Bring back CI to a normal state (#82)

* Fix invalid dtype checks * Fix invalid gemma quantization import * Fix dtype again * Quality * Update transformers requirements to support gemma * Add a few more to .dockerignore * Remove unused test workflow
huggingface · Feb 27, 2024 · 9601738 · 9601738
1 parent bb13d65
commit 9601738
Show file tree

Hide file tree

Showing 6 changed files with 19 additions and 30 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -1,3 +1,6 @@
+.env*/
+third-party/*
+
 **/*.engine
 **/*.pyc
 **/*.egg-info

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
diff --git a/.gitignore b/.gitignore
@@ -120,8 +120,8 @@ celerybeat.pid
 *.sage.py
 
 # Environments
-.env
-.venv
+.env*/
+.venv/
 env/
 venv/
 ENV/

diff --git a/setup.py b/setup.py
@@ -35,7 +35,7 @@
     "numpy >= 1.22.0",
     "onnx >= 1.12.0",
     "optimum >= 1.13.0",
-    "transformers >= 4.32.1",
+    "transformers >= 4.38.1",
     "pynvml"
 ]
 

diff --git a/src/optimum/nvidia/models/gemma.py b/src/optimum/nvidia/models/gemma.py
@@ -20,14 +20,14 @@
 import numpy as np
 import tensorrt_llm
 import torch
-from quantization import QuantMode
 from tensorrt_llm import Mapping, str_dtype_to_torch
 from tensorrt_llm._utils import numpy_to_torch, pad_vocab_size, torch_to_numpy
 from tensorrt_llm.layers import MoeConfig
 from tensorrt_llm.models import PretrainedConfig, PretrainedModel
 from tensorrt_llm.models.gemma.model import GemmaForCausalLM as TrtGemmaForCausalLM
 from tensorrt_llm.models.gemma.weight import dup_kv_weight, extract_layer_idx, split
 from tensorrt_llm.plugin import PluginConfig
+from tensorrt_llm.quantization import QuantMode
 from tensorrt_llm.runtime.lora_manager import LoraConfig
 from transformers import GemmaForCausalLM as TransformersGemmaForCausalLM
 from transformers import PretrainedConfig as TransformersPretrainedConfig

diff --git a/tests/test_dtype.py b/tests/test_dtype.py
@@ -24,15 +24,16 @@
 @pytest.mark.parametrize(
     "literal_dtype,dtype",
     [
-        ("int64", torch.int16),
-        ("float32", torch.float32),
+        ("bool", torch.bool),
+        ("uint8", torch.uint8),
+        ("int8", torch.int8),
+        # ("int16", torch.int16),
         ("int32", torch.int32),
+        ("int64", torch.int64),
+        ("float8", torch.float8_e4m3fn),  # Change this when supported
         ("float16", torch.float16),
         ("bfloat16", torch.bfloat16),
-        ("float8", torch.float8_e4m3fn),  # Change this when supported
-        ("int8", torch.int8),
-        ("uint8", torch.uint8),
-        ("bool", torch.bool),
+        ("float32", torch.float32),
     ],
 )
 def test_convert_str_to_torch(literal_dtype: str, dtype):
@@ -42,14 +43,14 @@ def test_convert_str_to_torch(literal_dtype: str, dtype):
 @pytest.mark.parametrize(
     "literal_dtype,dtype",
     [
+        ("uint8", trt.uint8),
+        ("int8", trt.int8),
+        ("int32", trt.int32),
         ("int64", trt.int64),
-        ("float32", trt.float32),
+        ("float8", trt.fp8),
         ("float16", trt.float16),
         ("bfloat16", trt.bfloat16),
-        ("int32", trt.int32),
-        ("float8", trt.fp8),
-        ("int8", trt.int8),
-        ("uint8", trt.uint8),
+        ("float32", trt.float32),
     ],
 )
 def test_convert_str_to_tensorrt(literal_dtype: str, dtype):
-Original file line number
+Diff line change
@@ Expand Up / @@ -120,8 +120,8 @@ celerybeat.pid @@
     *.sage.py
     # Environments
-    .env
-    .venv
+    .env*/
+    .venv/
     env/
     venv/
     ENV/
@@ Expand Down @@