fix conflict

FlagOpen · Jan 24, 2025 · e1125e1 · e1125e1
1 parent 55c6e8c
commit e1125e1
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 9 deletions.
diff --git a/flagscale/compress/algo/rtn.py b/flagscale/compress/algo/rtn.py
@@ -21,6 +21,11 @@ def __init__(self, name, layer, enable_fake_quant=False):
             if self.input_observer_args and self.input_observer_args.dynamic:
                 self.input_observer_args.observer = "minmax"
                 self.input_observer = Observer.load_from_registry(self.input_observer_args.get_observer(), quantization_args=self.input_observer_args)
+            if self.weights_observer_args:
+                # origin_weight = self.layer.weight.clone()
+                W = fake_quantize(self.layer.weight, self.layer.weight_scale, self.layer.weight_zero_point, self.weights_observer_args)
+                update_parameter_data(self.layer, W, f"weight")
+                del W
         else:
             if self.weights_observer_args and not self.weights_observer_args.dynamic:
                 self.weight_observer = Observer.load_from_registry(self.weights_observer_args.get_observer(), quantization_args=self.weights_observer_args)
@@ -60,10 +65,6 @@ def forward(self, inp, **kwargs):
                     del tmp_inp, error
                 else:
                     inp = fake_quantize(inp, self.layer.input_scale, self.layer.input_zero_point, self.input_observer_args)
-            if self.weights_observer_args:
-                W = fake_quantize(self.layer.weight, self.layer.weight_scale, self.layer.weight_zero_point, self.weights_observer_args)
-                update_parameter_data(self.layer, W, f"weight")
-                del W
         out = self.layer(inp, **kwargs)
         # if self._enable_fake_quant and self.output_observer:
         #     out = fake_quantize(out, self.layer.output_scale, self.layer.output_zero_point, self.output_observer.quantization_args)

diff --git a/flagscale/compress/compressor.py b/flagscale/compress/compressor.py
@@ -66,7 +66,10 @@ def __init__(self, cfg, model=None, dataset=None):
 
     def compress(self):
         self.tokenizer = None
-        self.model_path = self.cfg.model.pop("model_path")
+        self.model_path = None
+        if self.model is None:
+            assert self.cfg.model is not None
+            self.model_path = self.cfg.model.pop("model_path")
         if self.cfg.data.tokenzier_args is not None:
             self.tokenizer = AutoTokenizer.from_pretrained(self.cfg.data.tokenzier_args.pop("tokenizer_path"), **self.cfg.data.tokenzier_args)
         if self.model is None:
@@ -80,8 +83,6 @@ def compress(self):
             for algo_args in recipe:
                 algo_args = OmegaConf.to_container(algo_args)
                 algo_args["dataset"] = self.dataset
-                print("algo_args: ", algo_args)
-                # import pdb; pdb.set_trace()
                 algo_args["num_calibration_steps"] = self.cfg.data.get("num_calibration_steps", 384)
                 adapter = LLMCompressorAdapter(model=self.model, **algo_args)
                 ### modify model inplace
@@ -94,7 +95,6 @@ def save_pretrained(self, save_compressed=True):
         if self.tokenizer is not None:
             self.tokenizer.save_pretrained(self.cfg.system.save_dir)
         copy_rest_file(self.model_path, cfg.system.save_dir)
-        import pdb; pdb.set_trace()
 
     @torch.no_grad()
     def convert(self, model):

diff --git a/flagscale/compress/compressor_emu3.py b/flagscale/compress/compressor_emu3.py
@@ -62,7 +62,7 @@ def prepare_dataset(cfg):
     cmp = Compressor(cfg, dataset=dataset)
     cmp.compress()
     model = cmp.convert(cmp.model)
-
+    
     ### test code
     with torch.no_grad():
         from llmcompressor.pytorch.utils import tensors_to_device