diff --git a/runtime/gpu/model_repo/scoring/1/model.py b/runtime/gpu/model_repo/scoring/1/model.py
index 98b331ad5d..25c5d24284 100644
--- a/runtime/gpu/model_repo/scoring/1/model.py
+++ b/runtime/gpu/model_repo/scoring/1/model.py
@@ -75,7 +75,7 @@ def initialize(self, args):
     def init_ctc_rescore(self, parameters):
         num_processes = multiprocessing.cpu_count()
         cutoff_prob = 0.9999
-        blank_id = 0
+       
         alpha = 2.0
         beta = 1.0
         bidecoder = 0
@@ -104,8 +104,13 @@ def init_ctc_rescore(self, parameters):
 
         self.num_processes = num_processes
         self.cutoff_prob = cutoff_prob
-        self.blank_id = blank_id
-        _, vocab = self.load_vocab(vocab_path)
+        
+        ret = self.load_vocab(vocab_path)
+        id2vocab, vocab, space_id, blank_id, sos_eos = ret
+        self.space_id = space_id if space_id else -1
+        self.blank_id = blank_id if blank_id else 0
+        self.eos = self.sos = sos_eos if sos_eos else len(vocab) - 1
+
         if lm_path and os.path.exists(lm_path):
             self.lm = Scorer(alpha, beta, lm_path, vocab)
             print("Successfully load language model!")
@@ -125,24 +130,31 @@ def init_ctc_rescore(self, parameters):
                 )
         self.vocabulary = vocab
         self.bidecoder = bidecoder
-        sos = eos = len(vocab) - 1
-        self.sos = sos
-        self.eos = eos
+
+       
+ 
 
     def load_vocab(self, vocab_file):
         """
         load lang_char.txt
         """
         id2vocab = {}
+        space_id, blank_id, sos_eos = None, None, None
         with open(vocab_file, "r", encoding="utf-8") as f:
             for line in f:
                 line = line.strip()
                 char, id = line.split()
                 id2vocab[int(id)] = char
+                if char == " ":
+                    space_id = int(id)
+                elif char == "<blank>":
+                    blank_id = int(id)
+                elif char == "<sos/eos>":
+                    sos_eos = int(id)
         vocab = [0] * len(id2vocab)
         for id, char in id2vocab.items():
             vocab[id] = char
-        return id2vocab, vocab
+        return (id2vocab, vocab, space_id, blank_id, sos_eos)
 
     def load_hotwords(self, hotwords_file):
         """
diff --git a/wenet/bin/export_onnx_gpu.py b/wenet/bin/export_onnx_gpu.py
index 9b46208027..ab6c1dbe05 100644
--- a/wenet/bin/export_onnx_gpu.py
+++ b/wenet/bin/export_onnx_gpu.py
@@ -1200,7 +1200,7 @@ def export_rescoring_decoder(model, configs, args, logger, decoder_onnx_path,
             configs['cmvn_conf'] = {}
         else:
             assert configs['cmvn'] == "global_cmvn"
-            assert configs['cmvn']['cmvn_conf'] is not None
+            assert configs['cmvn_conf'] is not None
         configs['cmvn_conf']["cmvn_file"] = args.cmvn_file
     if (args.reverse_weight != -1.0
             and "reverse_weight" in configs["model_conf"]):
diff --git a/wenet/bin/recognize_onnx_gpu.py b/wenet/bin/recognize_onnx_gpu.py
index 3fb0d8bbba..640572bc38 100644
--- a/wenet/bin/recognize_onnx_gpu.py
+++ b/wenet/bin/recognize_onnx_gpu.py
@@ -122,6 +122,7 @@ def main():
         configs = override_config(configs, args.override_config)
 
     reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
+    special_tokens=configs.get('tokenizer_conf',{}).get('special_tokens', None)
     test_conf = copy.deepcopy(configs['dataset_conf'])
     test_conf['filter_conf']['max_length'] = 102400
     test_conf['filter_conf']['min_length'] = 0
@@ -138,6 +139,8 @@ def main():
     test_conf['fbank_conf']['dither'] = 0.0
     test_conf['batch_conf']['batch_type'] = "static"
     test_conf['batch_conf']['batch_size'] = args.batch_size
+    
+
 
     tokenizer = init_tokenizer(configs)
     test_dataset = Dataset(args.data_type,
@@ -165,13 +168,21 @@ def main():
     # Load dict
     vocabulary = []
     char_dict = {}
+    
+
     with open(args.dict, 'r') as fin:
         for line in fin:
             arr = line.strip().split()
             assert len(arr) == 2
             char_dict[int(arr[1])] = arr[0]
             vocabulary.append(arr[0])
-    eos = sos = len(char_dict) - 1
+
+    vocab_size = len(char_dict)
+    sos = (vocab_size - 1 if special_tokens is None else
+                    special_tokens.get("<sos>", vocab_size - 1))
+    eos = (vocab_size - 1 if special_tokens is None else
+                    special_tokens.get("<eos>", vocab_size - 1))
+
     with torch.no_grad(), open(args.result_file, 'w') as fout:
         for _, batch in enumerate(test_data_loader):
             keys, feats, _, feats_lengths, _ = batch