Skip to content

Commit

Permalink
Add abstract impl for FusedNBitRowwiseQuantizedSBHalfToFloatOrHalf (#…
Browse files Browse the repository at this point in the history
…3231)

Summary:

X-link: facebookresearch/FBGEMM#329

Reviewed By: frank-wei

Differential Revision: D63925369
  • Loading branch information
qxy11 authored and facebook-github-bot committed Oct 8, 2024
1 parent b5d2d3e commit 6e11b17
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 57 deletions.
50 changes: 50 additions & 0 deletions fbgemm_gpu/fbgemm_gpu/sparse_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -969,6 +969,48 @@ def hfp8_quantized_to_float(input: Tensor, ebits: int, exponent_bias: int) -> Te
return torch.empty_like(input, dtype=torch.float32)


def float_or_half_to_fused_nbit_rowwise_quantized_sbhalf(
input_t: Tensor,
bit_rate: int,
) -> Tensor:
input_sizes = input_t.size()
torch._check(len(input_sizes) == 2)
nrows = input_sizes[0]
ncols = input_sizes[1]
num_elem_per_byte = 8 // bit_rate

torch._check(ncols % (2 * num_elem_per_byte) == 0)
output_columns = (ncols + num_elem_per_byte - 1) // num_elem_per_byte + 2 * 2
output = torch.empty(
(nrows, output_columns), device=input_t.device, dtype=torch.uint8
)
return output


def fused_nbit_rowwise_quantized_sb_half_to_float_or_half(
input_t: Tensor,
bit_rate: int,
output_dtype: int = 0,
) -> Tensor:
torch._check(output_dtype in [SparseType.FP32.as_int(), SparseType.FP16.as_int()])
nrows = input_t.size(0)
ncols = input_t.size(1)
if input_t.dtype == torch.quint2x4:
ncols = (ncols + 3) // 4
elif input_t.dtype == torch.quint4x2:
ncols = (ncols + 1) // 2
num_elem_per_byte = 8 // bit_rate
output_columns = (ncols - 2 * 2) * num_elem_per_byte
if output_dtype == SparseType.FP32.as_int():
return torch.empty(
(nrows, output_columns), dtype=torch.float32, device=input_t.device
)
else: # output_dtype is SparseType.FP16
return torch.empty(
(nrows, output_columns), dtype=torch.float16, device=input_t.device
)


def _setup() -> None:
# pyre-ignore[16]
_setup.done = getattr(_setup, "done", False)
Expand Down Expand Up @@ -1103,6 +1145,14 @@ def impl_autograd(op_name, fn, setup_context: Optional[Callable] = None) -> None
"fbgemm::HFP8QuantizedToFloat",
hfp8_quantized_to_float,
)
impl_abstract(
"fbgemm::FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf",
float_or_half_to_fused_nbit_rowwise_quantized_sbhalf,
)
impl_abstract(
"fbgemm::FusedNBitRowwiseQuantizedSBHalfToFloatOrHalf",
fused_nbit_rowwise_quantized_sb_half_to_float_or_half,
)

_setup.done = True

Expand Down
159 changes: 104 additions & 55 deletions fbgemm_gpu/test/quantize/failures_dict_fast.json
Original file line number Diff line number Diff line change
@@ -1,58 +1,107 @@
{
"_description": "This is a dict containing failures for tests autogenerated by generate_opcheck_tests. For more details, please see https://docs.google.com/document/d/1Pj5HRZvdOq3xpFpbEjUZp2hBovhy7Wnxw14m6lF2154/edit",
"_version": 1,
"data": {
"fbgemm::FloatToFused8BitRowwiseQuantized": {
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_cpu_int8": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_fused_pooled_emb_quant": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_gpu_no_cache_int8": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_gpu_uvm_cache_int8": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_fused_pooled_emb_quant": {
"comment": "",
"status": "xfail"
}
},
"fbgemm::FloatToFusedNBitRowwiseQuantizedSBHalf": {
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_fused_pooled_emb_quant": {
"comment": "",
"status": "xfail"
}
},
"fbgemm::FloatToHFP8Quantized": {},
"fbgemm::Fused8BitRowwiseQuantizedToFloat": {
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_cpu_int8": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_fused_pooled_emb_quant": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_gpu_no_cache_int8": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_gpu_uvm_cache_int8": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_fused_pooled_emb_quant": {
"comment": "",
"status": "xfail"
}
},
"fbgemm::HFP8QuantizedToFloat": {}
"_description": "This is a dict containing failures for tests autogenerated by generate_opcheck_tests. For more details, please see https://docs.google.com/document/d/1Pj5HRZvdOq3xpFpbEjUZp2hBovhy7Wnxw14m6lF2154/edit",
"_version": 1,
"data": {
"fbgemm::FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf": {},
"fbgemm::FloatToFused8BitRowwiseQuantized": {
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_cpu_int8": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_fused_pooled_emb_quant": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_gpu_no_cache_int8": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_gpu_uvm_cache_int8": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_fused_pooled_emb_quant": {
"comment": "",
"status": "xfail"
}
},
"fbgemm::FloatToFusedNBitRowwiseQuantizedSBHalf": {
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_fused_pooled_emb_quant": {
"comment": "",
"status": "xfail"
},
"TestFusedNBitRowwiseQuantizationConversion.test_faketensor__test_quantize_and_dequantize_op": {
"comment": "",
"status": "xfail"
},
"TestFusedNBitRowwiseQuantizationConversion.test_faketensor__test_quantize_and_dequantize_op_cuda_large_nrows": {
"comment": "",
"status": "xfail"
},
"TestFusedNBitRowwiseQuantizationConversion.test_faketensor__test_quantize_op": {
"comment": "",
"status": "xfail"
}
},
"fbgemm::FloatToHFP8Quantized": {},
"fbgemm::Fused8BitRowwiseQuantizedToFloat": {
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_cpu_int8": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_fused_pooled_emb_quant": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_gpu_no_cache_int8": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_gpu_uvm_cache_int8": {
"comment": "",
"status": "xfail"
},
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_fused_pooled_emb_quant": {
"comment": "",
"status": "xfail"
}
},
"fbgemm::FusedNBitRowwiseQuantizedSBHalfToFloat": {
"TestFusedNBitRowwiseQuantizationConversion.test_faketensor__test_quantize_and_dequantize_op": {
"comment": "",
"status": "xfail"
},
"TestFusedNBitRowwiseQuantizationConversion.test_faketensor__test_quantize_and_dequantize_op_cuda_large_nrows": {
"comment": "",
"status": "xfail"
}
},
"fbgemm::FusedNBitRowwiseQuantizedSBHalfToFloatOrHalf": {
"TestFusedNBitRowwiseQuantizationConversion.test_faketensor__test_quantize_and_dequantize_op": {
"comment": "",
"status": "xfail"
},
"TestFusedNBitRowwiseQuantizationConversion.test_faketensor__test_quantize_and_dequantize_op_cpu_and_cuda": {
"comment": "",
"status": "xfail"
}
},
"fbgemm::FusedNBitRowwiseQuantizedSBHalfToHalf": {
"TestFusedNBitRowwiseQuantizationConversion.test_faketensor__test_quantize_and_dequantize_op": {
"comment": "",
"status": "xfail"
}
},
"fbgemm::HFP8QuantizedToFloat": {},
"fbgemm::HalfToFusedNBitRowwiseQuantizedSBHalf": {
"TestFusedNBitRowwiseQuantizationConversion.test_faketensor__test_quantize_and_dequantize_op": {
"comment": "",
"status": "xfail"
},
"TestFusedNBitRowwiseQuantizationConversion.test_faketensor__test_quantize_op": {
"comment": "",
"status": "xfail"
}
}
}
}
8 changes: 6 additions & 2 deletions fbgemm_gpu/test/quantize/fused_nbit_rowwise_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,18 @@

if open_source:
# pyre-ignore[21]
from test_utils import gpu_available
from test_utils import gpu_available, optests
else:
from fbgemm_gpu.test.test_utils import gpu_available
from fbgemm_gpu.test.test_utils import gpu_available, optests

torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")

torch.ops.import_module("fbgemm_gpu.sparse_ops")

no_long_tests: bool = False


@optests.generate_opcheck_tests(fast=True)
class TestFusedNBitRowwiseQuantizationConversion(unittest.TestCase):
# pyre-ignore [56]: Invalid decoration, was not able to infer the type of argument
@given(
Expand Down

0 comments on commit 6e11b17

Please sign in to comment.