Skip to content

Commit

Permalink
address #28
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains committed Aug 27, 2024
1 parent e0326d0 commit b39fdbe
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 5 deletions.
14 changes: 10 additions & 4 deletions e2_tts_pytorch/e2_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,22 +248,24 @@ def __init__(
cond_audio_to_text = True
):
super().__init__()
self.text_to_audio = nn.Linear(dim_text, dim, bias = False)
self.text_to_audio = nn.Linear(dim_text + dim, dim, bias = False)
nn.init.zeros_(self.text_to_audio.weight)

self.cond_audio_to_text = cond_audio_to_text

if cond_audio_to_text:
self.audio_to_text = nn.Linear(dim, dim_text, bias = False)
self.audio_to_text = nn.Linear(dim + dim_text, dim_text, bias = False)
nn.init.zeros_(self.audio_to_text.weight)

def forward(
self,
audio: Float['b n d'],
text: Float['b n dt']
):
text_cond = self.text_to_audio(text)
audio_cond = self.audio_to_text(audio) if self.cond_audio_to_text else 0.
audio_text, _ = pack((audio, text), 'b n *')

text_cond = self.text_to_audio(audio_text)
audio_cond = self.audio_to_text(audio_text) if self.cond_audio_to_text else 0.

return audio + text_cond, text + audio_cond

Expand Down Expand Up @@ -742,6 +744,10 @@ def transformer_with_pred_head(
x = self.proj_in(x)
cond = self.cond_proj_in(cond)

# add the condition, given as using voicebox-like scheme

x = x + cond

# whether to use a text embedding

text_embed = None
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "e2-tts-pytorch"
version = "0.6.2"
version = "0.6.3"
description = "E2-TTS in Pytorch"
authors = [
{ name = "Phil Wang", email = "[email protected]" }
Expand Down

0 comments on commit b39fdbe

Please sign in to comment.