optimize(vLLM): move PostModel out

2noise · Aug 25, 2024 · 2d8fde1 · 2d8fde1
1 parent 8bc721c
commit 2d8fde1
Show file tree

Hide file tree

Showing 4 changed files with 6 additions and 74 deletions.
diff --git a/ChatTTS/model/gpt.py b/ChatTTS/model/gpt.py
@@ -1,9 +1,8 @@
-import os, platform
+import platform
 from dataclasses import dataclass
 import logging
 from typing import Union, List, Optional, Tuple, Callable
 import gc
-from pathlib import Path
 
 import torch
 import torch.nn as nn

diff --git a/ChatTTS/model/velocity/__init__.py b/ChatTTS/model/velocity/__init__.py
@@ -1,3 +1,2 @@
 from .llm import LLM
-from .post_model import PostModel
 from .sampling_params import SamplingParams
diff --git a/ChatTTS/model/velocity/model_runner.py b/ChatTTS/model/velocity/model_runner.py
@@ -22,7 +22,8 @@
     SequenceOutput,
 )
 from vllm.utils import in_wsl
-from .post_model import PostModel, Sampler
+from ..embed import Embed
+from .sampler import Sampler
 from safetensors.torch import safe_open
 
 logger = init_logger(__name__)
@@ -78,7 +79,7 @@ def __init__(
 
     def load_model(self) -> None:
         self.model = get_model(self.model_config)
-        self.post_model = PostModel(
+        self.post_model = Embed(
             self.model_config.get_hidden_size(),
             self.model_config.num_audio_tokens,
             self.model_config.num_text_tokens,

diff --git a/ChatTTS/model/velocity/post_model.py → ChatTTS/model/velocity/sampler.py b/ChatTTS/model/velocity/post_model.py → ChatTTS/model/velocity/sampler.py
@@ -1,78 +1,11 @@
-import os
-
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-"""
-https://stackoverflow.com/questions/62691279/how-to-disable-tokenizers-parallelism-true-false-warning
-"""
-
 import torch
-import torch.nn as nn
 from torch.functional import F
-from torch.nn.utils.parametrizations import weight_norm
 from typing import List, Callable
 
-
-class PostModel(nn.Module):
-    def __init__(
-        self, hidden_size: int, num_audio_tokens: int, num_text_tokens: int, num_vq=4
-    ):
-        super().__init__()
-
-        self.num_vq = num_vq
-        self.num_audio_tokens = num_audio_tokens
-
-        self.model_dim = hidden_size
-        self.emb_code = nn.ModuleList(
-            [nn.Embedding(num_audio_tokens, self.model_dim) for _ in range(num_vq)],
-        )
-        self.emb_text = nn.Embedding(num_text_tokens, self.model_dim)
-
-        self.head_text = weight_norm(
-            nn.Linear(self.model_dim, num_text_tokens, bias=False),
-            name="weight",
-        )
-        self.head_code = nn.ModuleList(
-            [
-                weight_norm(
-                    nn.Linear(self.model_dim, num_audio_tokens, bias=False),
-                    name="weight",
-                )
-                for _ in range(self.num_vq)
-            ],
-        )
-
-    def forward(self, input_ids: torch.Tensor, text_mask: torch.Tensor) -> torch.Tensor:
-        """
-        get_emb
-        """
-        device = next(self.parameters()).device
-        emb_text: torch.Tensor = self.emb_text(
-            input_ids[text_mask].narrow(1, 0, 1).squeeze_(1).to(device)
-        )
-
-        text_mask_inv = text_mask.logical_not().to(device)
-        masked_input_ids: torch.Tensor = input_ids[text_mask_inv].to(device)
-
-        emb_code = [
-            self.emb_code[i](masked_input_ids[:, i]) for i in range(self.num_vq)
-        ]
-        emb_code = torch.stack(emb_code, 2).sum(2)
-
-        emb = torch.zeros(
-            (input_ids.shape[:-1]) + (emb_text.shape[-1],),
-            device=emb_text.device,
-            dtype=emb_text.dtype,
-        )
-        emb[text_mask] = emb_text
-        emb[text_mask_inv] = emb_code.to(emb.dtype)
-
-        del emb_text, emb_code, text_mask_inv
-
-        return emb
-
+from ..embed import Embed
 
 class Sampler:
-    def __init__(self, post_model: PostModel, num_audio_tokens: int, num_vq: int):
+    def __init__(self, post_model: Embed, num_audio_tokens: int, num_vq: int):
         self.post_model = post_model
         self.device = next(self.post_model.parameters()).device
         self.num_audio_tokens = num_audio_tokens