From 271f77d06a8cb71a46b850d6588e49f2073ecfb0 Mon Sep 17 00:00:00 2001 From: takagada480 Date: Thu, 2 Jan 2025 07:05:44 +0200 Subject: [PATCH] Delete examples/mms/tts/tutorial directory H --- .../tutorial/MMS_TTS_Inference_Colab.ipynb | 555 ------------------ 1 file changed, 555 deletions(-) delete mode 100644 examples/mms/tts/tutorial/MMS_TTS_Inference_Colab.ipynb diff --git a/examples/mms/tts/tutorial/MMS_TTS_Inference_Colab.ipynb b/examples/mms/tts/tutorial/MMS_TTS_Inference_Colab.ipynb deleted file mode 100644 index 22b7f815be..0000000000 --- a/examples/mms/tts/tutorial/MMS_TTS_Inference_Colab.ipynb +++ /dev/null @@ -1,555 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "okQdUOf2ovBS" - }, - "source": [ - "#Running MMS-TTS inference in Colab\n", - "In this notebook, we give an example on how to run text-to-speech inference using MMS TTS models. \n", - "\n", - "By default, we run inference on a GPU. If you want to perform CPU inference, go to \"Runtiime\" menu -> \"Change runtime type\" and set \"Hardware accelerator\" to \"None\" before running." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XK2jXLmEpgK5" - }, - "source": [ - "## 1. Preliminaries\n", - "This section installs necessary python packages for the other sections. Run it first." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "vGyb3dGWpmks", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "outputId": "9825fea8-d247-48d9-b33b-dbff36e905fa" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Cloning into 'vits'...\n", - "remote: Enumerating objects: 81, done.\u001b[K\n", - "remote: Total 81 (delta 0), reused 0 (delta 0), pack-reused 81\u001b[K\n", - "Unpacking objects: 100% (81/81), 3.33 MiB | 2.44 MiB/s, done.\n", - "Python 3.10.11\n", - "/content/vits\n", - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Collecting Cython==0.29.21\n", - " Downloading Cython-0.29.21-py2.py3-none-any.whl (974 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m974.2/974.2 kB\u001b[0m \u001b[31m27.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hInstalling collected packages: Cython\n", - " Attempting uninstall: Cython\n", - " Found existing installation: Cython 0.29.34\n", - " Uninstalling Cython-0.29.34:\n", - " Successfully uninstalled Cython-0.29.34\n", - "Successfully installed Cython-0.29.21\n", - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Collecting librosa==0.8.0\n", - " Downloading librosa-0.8.0.tar.gz (183 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.9/183.9 kB\u001b[0m \u001b[31m15.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: audioread>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from librosa==0.8.0) (3.0.0)\n", - "Requirement already satisfied: numpy>=1.15.0 in /usr/local/lib/python3.10/dist-packages (from librosa==0.8.0) (1.22.4)\n", - "Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from librosa==0.8.0) (1.10.1)\n", - "Requirement already satisfied: scikit-learn!=0.19.0,>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from librosa==0.8.0) (1.2.2)\n", - "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa==0.8.0) (1.2.0)\n", - "Requirement already satisfied: decorator>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from librosa==0.8.0) (4.4.2)\n", - "Collecting resampy>=0.2.2 (from librosa==0.8.0)\n", - " Downloading resampy-0.4.2-py3-none-any.whl (3.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m101.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: numba>=0.43.0 in /usr/local/lib/python3.10/dist-packages (from librosa==0.8.0) (0.56.4)\n", - "Requirement already satisfied: soundfile>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from librosa==0.8.0) (0.12.1)\n", - "Requirement already satisfied: pooch>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa==0.8.0) (1.6.0)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba>=0.43.0->librosa==0.8.0) (0.39.1)\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from numba>=0.43.0->librosa==0.8.0) (67.7.2)\n", - "Requirement already satisfied: appdirs>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa==0.8.0) (1.4.4)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa==0.8.0) (23.1)\n", - "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.0->librosa==0.8.0) (2.27.1)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn!=0.19.0,>=0.14.0->librosa==0.8.0) (3.1.0)\n", - "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile>=0.9.0->librosa==0.8.0) (1.15.1)\n", - "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile>=0.9.0->librosa==0.8.0) (2.21)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa==0.8.0) (1.26.15)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa==0.8.0) (2022.12.7)\n", - "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa==0.8.0) (2.0.12)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa==0.8.0) (3.4)\n", - "Building wheels for collected packages: librosa\n", - " Building wheel for librosa (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for librosa: filename=librosa-0.8.0-py3-none-any.whl size=201378 sha256=c299b7ae3d6d527a4889716009ab27ca4018546d04f0e4de1019ea919311c0dc\n", - " Stored in directory: /root/.cache/pip/wheels/bf/b7/85/2f8044306ccec014930aea23ad4852fca9e2584e21c6972bc6\n", - "Successfully built librosa\n", - "Installing collected packages: resampy, librosa\n", - " Attempting uninstall: librosa\n", - " Found existing installation: librosa 0.10.0.post2\n", - " Uninstalling librosa-0.10.0.post2:\n", - " Successfully uninstalled librosa-0.10.0.post2\n", - "Successfully installed librosa-0.8.0 resampy-0.4.2\n", - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Collecting phonemizer==2.2.1\n", - " Downloading phonemizer-2.2.1-py3-none-any.whl (49 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.0/49.0 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from phonemizer==2.2.1) (1.2.0)\n", - "Collecting segments (from phonemizer==2.2.1)\n", - " Downloading segments-2.2.1-py2.py3-none-any.whl (15 kB)\n", - "Requirement already satisfied: attrs>=18.1 in /usr/local/lib/python3.10/dist-packages (from phonemizer==2.2.1) (23.1.0)\n", - "Collecting clldutils>=1.7.3 (from segments->phonemizer==2.2.1)\n", - " Downloading clldutils-3.19.0-py2.py3-none-any.whl (1.7 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m84.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting csvw>=1.5.6 (from segments->phonemizer==2.2.1)\n", - " Downloading csvw-3.1.3-py2.py3-none-any.whl (56 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.7/56.7 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: regex in /usr/local/lib/python3.10/dist-packages (from segments->phonemizer==2.2.1) (2022.10.31)\n", - "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from clldutils>=1.7.3->segments->phonemizer==2.2.1) (2.8.2)\n", - "Requirement already satisfied: tabulate>=0.7.7 in /usr/local/lib/python3.10/dist-packages (from clldutils>=1.7.3->segments->phonemizer==2.2.1) (0.8.10)\n", - "Collecting colorlog (from clldutils>=1.7.3->segments->phonemizer==2.2.1)\n", - " Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)\n", - "Collecting pylatexenc (from clldutils>=1.7.3->segments->phonemizer==2.2.1)\n", - " Downloading pylatexenc-2.10.tar.gz (162 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m162.6/162.6 kB\u001b[0m \u001b[31m24.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: markdown in /usr/local/lib/python3.10/dist-packages (from clldutils>=1.7.3->segments->phonemizer==2.2.1) (3.4.3)\n", - "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from clldutils>=1.7.3->segments->phonemizer==2.2.1) (4.9.2)\n", - "Requirement already satisfied: markupsafe in /usr/local/lib/python3.10/dist-packages (from clldutils>=1.7.3->segments->phonemizer==2.2.1) (2.1.2)\n", - "Requirement already satisfied: babel in /usr/local/lib/python3.10/dist-packages (from csvw>=1.5.6->segments->phonemizer==2.2.1) (2.12.1)\n", - "Collecting colorama (from csvw>=1.5.6->segments->phonemizer==2.2.1)\n", - " Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n", - "Collecting isodate (from csvw>=1.5.6->segments->phonemizer==2.2.1)\n", - " Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.7/41.7 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: jsonschema in /usr/local/lib/python3.10/dist-packages (from csvw>=1.5.6->segments->phonemizer==2.2.1) (4.3.3)\n", - "Collecting language-tags (from csvw>=1.5.6->segments->phonemizer==2.2.1)\n", - " Downloading language_tags-1.2.0-py3-none-any.whl (213 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m213.4/213.4 kB\u001b[0m \u001b[31m29.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting rdflib (from csvw>=1.5.6->segments->phonemizer==2.2.1)\n", - " Downloading rdflib-6.3.2-py3-none-any.whl (528 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m528.1/528.1 kB\u001b[0m \u001b[31m57.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from csvw>=1.5.6->segments->phonemizer==2.2.1) (2.27.1)\n", - "Collecting rfc3986<2 (from csvw>=1.5.6->segments->phonemizer==2.2.1)\n", - " Downloading rfc3986-1.5.0-py2.py3-none-any.whl (31 kB)\n", - "Requirement already satisfied: uritemplate>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from csvw>=1.5.6->segments->phonemizer==2.2.1) (4.1.1)\n", - "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from isodate->csvw>=1.5.6->segments->phonemizer==2.2.1) (1.16.0)\n", - "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema->csvw>=1.5.6->segments->phonemizer==2.2.1) (0.19.3)\n", - "Requirement already satisfied: pyparsing<4,>=2.1.0 in /usr/local/lib/python3.10/dist-packages (from rdflib->csvw>=1.5.6->segments->phonemizer==2.2.1) (3.0.9)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->csvw>=1.5.6->segments->phonemizer==2.2.1) (1.26.15)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->csvw>=1.5.6->segments->phonemizer==2.2.1) (2022.12.7)\n", - "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->csvw>=1.5.6->segments->phonemizer==2.2.1) (2.0.12)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->csvw>=1.5.6->segments->phonemizer==2.2.1) (3.4)\n", - "Building wheels for collected packages: pylatexenc\n", - " Building wheel for pylatexenc (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for pylatexenc: filename=pylatexenc-2.10-py3-none-any.whl size=136820 sha256=e99eecd0f55e1827ac73565fc43f5565f432aca243434ea921e0a31c5827331d\n", - " Stored in directory: /root/.cache/pip/wheels/d3/31/8b/e09b0386afd80cfc556c00408c9aeea5c35c4d484a9c762fd5\n", - "Successfully built pylatexenc\n", - "Installing collected packages: rfc3986, pylatexenc, language-tags, isodate, colorlog, colorama, rdflib, clldutils, csvw, segments, phonemizer\n", - "Successfully installed clldutils-3.19.0 colorama-0.4.6 colorlog-6.7.0 csvw-3.1.3 isodate-0.6.1 language-tags-1.2.0 phonemizer-2.2.1 pylatexenc-2.10 rdflib-6.3.2 rfc3986-1.5.0 segments-2.2.1\n", - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (1.10.1)\n", - "Requirement already satisfied: numpy<1.27.0,>=1.19.5 in /usr/local/lib/python3.10/dist-packages (from scipy) (1.22.4)\n", - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (1.22.4)\n", - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.0.1+cu118)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.12.0)\n", - "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.5.0)\n", - "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.11.1)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.1)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.2)\n", - "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.0.0)\n", - "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (3.25.2)\n", - "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (16.0.5)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.2)\n", - "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n", - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (0.15.2+cu118)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchvision) (1.22.4)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchvision) (2.27.1)\n", - "Requirement already satisfied: torch==2.0.1 in /usr/local/lib/python3.10/dist-packages (from torchvision) (2.0.1+cu118)\n", - "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision) (8.4.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch==2.0.1->torchvision) (3.12.0)\n", - "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch==2.0.1->torchvision) (4.5.0)\n", - "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch==2.0.1->torchvision) (1.11.1)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch==2.0.1->torchvision) (3.1)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch==2.0.1->torchvision) (3.1.2)\n", - "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch==2.0.1->torchvision) (2.0.0)\n", - "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch==2.0.1->torchvision) (3.25.2)\n", - "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch==2.0.1->torchvision) (16.0.5)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (1.26.15)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (2022.12.7)\n", - "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (2.0.12)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (3.4)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch==2.0.1->torchvision) (2.1.2)\n", - "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch==2.0.1->torchvision) (1.3.0)\n", - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (3.7.1)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.0.7)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (0.11.0)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (4.39.3)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.4.4)\n", - "Requirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.22.4)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (23.1)\n", - "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (8.4.0)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (3.0.9)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (2.8.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\n", - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Collecting Unidecode==1.1.1\n", - " Downloading Unidecode-1.1.1-py2.py3-none-any.whl (238 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m238.3/238.3 kB\u001b[0m \u001b[31m18.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hInstalling collected packages: Unidecode\n", - "Successfully installed Unidecode-1.1.1\n", - "/content/vits/monotonic_align\n", - "Compiling core.pyx because it changed.\n", - "[1/1] Cythonizing core.pyx\n", - "/usr/local/lib/python3.10/dist-packages/Cython/Compiler/Main.py:369: FutureWarning: Cython directive 'language_level' not set, using 2 for now (Py2). This will change in a later release! File: /content/vits/monotonic_align/core.pyx\n", - " tree = Parsing.p_module(s, pxd, full_module_name)\n", - "\u001b[01m\u001b[Kcore.c:\u001b[m\u001b[K In function ‘\u001b[01m\u001b[K__Pyx_InitGlobals\u001b[m\u001b[K’:\n", - "\u001b[01m\u001b[Kcore.c:16766:1:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K‘\u001b[01m\u001b[KPyEval_InitThreads\u001b[m\u001b[K’ is deprecated [\u001b[01;35m\u001b[K-Wdeprecated-declarations\u001b[m\u001b[K]\n", - "16766 | \u001b[01;35m\u001b[KPyEval_InitThreads\u001b[m\u001b[K();\n", - " | \u001b[01;35m\u001b[K^~~~~~~~~~~~~~~~~~\u001b[m\u001b[K\n", - "In file included from \u001b[01m\u001b[K/usr/include/python3.10/Python.h:130\u001b[m\u001b[K,\n", - " from \u001b[01m\u001b[Kcore.c:16\u001b[m\u001b[K:\n", - "\u001b[01m\u001b[K/usr/include/python3.10/ceval.h:122:37:\u001b[m\u001b[K \u001b[01;36m\u001b[Knote: \u001b[m\u001b[Kdeclared here\n", - " 122 | Py_DEPRECATED(3.9) PyAPI_FUNC(void) \u001b[01;36m\u001b[KPyEval_InitThreads\u001b[m\u001b[K(void);\n", - " | \u001b[01;36m\u001b[K^~~~~~~~~~~~~~~~~~\u001b[m\u001b[K\n", - "/content/vits\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "'/content/vits'" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "string" - } - }, - "metadata": {}, - "execution_count": 1 - } - ], - "source": [ - "%pwd\n", - "!git clone https://github.com/jaywalnut310/vits.git\n", - "!python --version\n", - "%cd vits/\n", - "\n", - "!pip install Cython==0.29.21\n", - "!pip install librosa==0.8.0\n", - "!pip install phonemizer==2.2.1\n", - "!pip install scipy\n", - "!pip install numpy\n", - "!pip install torch\n", - "!pip install torchvision\n", - "!pip install matplotlib\n", - "!pip install Unidecode==1.1.1\n", - "\n", - "%cd monotonic_align/\n", - "%mkdir monotonic_align\n", - "!python3 setup.py build_ext --inplace\n", - "%cd ../\n", - "%pwd" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KuBzieKbuJKN" - }, - "source": [ - "## 2. Choose a language and download its checkpoint\n", - "Find the ISO code for your target language [here](https://dl.fbaipublicfiles.com/mms/tts/all-tts-languages.html). You can find more details about the languages we currently support for TTS in this [table](https://dl.fbaipublicfiles.com/mms/misc/language_coverage_mms.html)." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "UtEeQcmwuUaG", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "2adfb7eb-b9a2-44c3-8571-72fbc4b60aff" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Download model for language: eng\n", - "Model checkpoints in ./eng: ['G_100000.pth', 'config.json', 'vocab.txt']\n" - ] - } - ], - "source": [ - "import os\n", - "import subprocess\n", - "import locale\n", - "locale.getpreferredencoding = lambda: \"UTF-8\"\n", - "\n", - "def download(lang, tgt_dir=\"./\"):\n", - " lang_fn, lang_dir = os.path.join(tgt_dir, lang+'.tar.gz'), os.path.join(tgt_dir, lang)\n", - " cmd = \";\".join([\n", - " f\"wget https://dl.fbaipublicfiles.com/mms/tts/{lang}.tar.gz -O {lang_fn}\",\n", - " f\"tar zxvf {lang_fn}\"\n", - " ])\n", - " print(f\"Download model for language: {lang}\")\n", - " subprocess.check_output(cmd, shell=True)\n", - " print(f\"Model checkpoints in {lang_dir}: {os.listdir(lang_dir)}\")\n", - " return lang_dir\n", - "\n", - "LANG = \"eng\"\n", - "ckpt_dir = download(LANG)" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## 3. Load the checkpoint" - ], - "metadata": { - "id": "zexlezYiSWMb" - } - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "Sxi3CXmGqH6r", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "24710ada-6f04-4f29-c5f2-000458784ed8" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Run inference with cuda\n", - "load ./eng/G_100000.pth\n" - ] - } - ], - "source": [ - "from IPython.display import Audio\n", - "import os\n", - "import re\n", - "import glob\n", - "import json\n", - "import tempfile\n", - "import math\n", - "import torch\n", - "from torch import nn\n", - "from torch.nn import functional as F\n", - "from torch.utils.data import DataLoader\n", - "import numpy as np\n", - "import commons\n", - "import utils\n", - "import argparse\n", - "import subprocess\n", - "from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate\n", - "from models import SynthesizerTrn\n", - "from scipy.io.wavfile import write\n", - "\n", - "def preprocess_char(text, lang=None):\n", - " \"\"\"\n", - " Special treatement of characters in certain languages\n", - " \"\"\"\n", - " print(lang)\n", - " if lang == 'ron':\n", - " text = text.replace(\"ț\", \"ţ\")\n", - " return text\n", - "\n", - "class TextMapper(object):\n", - " def __init__(self, vocab_file):\n", - " self.symbols = [x.replace(\"\\n\", \"\") for x in open(vocab_file, encoding=\"utf-8\").readlines()]\n", - " self.SPACE_ID = self.symbols.index(\" \")\n", - " self._symbol_to_id = {s: i for i, s in enumerate(self.symbols)}\n", - " self._id_to_symbol = {i: s for i, s in enumerate(self.symbols)}\n", - "\n", - " def text_to_sequence(self, text, cleaner_names):\n", - " '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.\n", - " Args:\n", - " text: string to convert to a sequence\n", - " cleaner_names: names of the cleaner functions to run the text through\n", - " Returns:\n", - " List of integers corresponding to the symbols in the text\n", - " '''\n", - " sequence = []\n", - " clean_text = text.strip()\n", - " for symbol in clean_text:\n", - " symbol_id = self._symbol_to_id[symbol]\n", - " sequence += [symbol_id]\n", - " return sequence\n", - "\n", - " def uromanize(self, text, uroman_pl):\n", - " iso = \"xxx\"\n", - " with tempfile.NamedTemporaryFile() as tf, \\\n", - " tempfile.NamedTemporaryFile() as tf2:\n", - " with open(tf.name, \"w\") as f:\n", - " f.write(\"\\n\".join([text]))\n", - " cmd = f\"perl \" + uroman_pl\n", - " cmd += f\" -l {iso} \"\n", - " cmd += f\" < {tf.name} > {tf2.name}\"\n", - " os.system(cmd)\n", - " outtexts = []\n", - " with open(tf2.name) as f:\n", - " for line in f:\n", - " line = re.sub(r\"\\s+\", \" \", line).strip()\n", - " outtexts.append(line)\n", - " outtext = outtexts[0]\n", - " return outtext\n", - "\n", - " def get_text(self, text, hps):\n", - " text_norm = self.text_to_sequence(text, hps.data.text_cleaners)\n", - " if hps.data.add_blank:\n", - " text_norm = commons.intersperse(text_norm, 0)\n", - " text_norm = torch.LongTensor(text_norm)\n", - " return text_norm\n", - "\n", - " def filter_oov(self, text):\n", - " val_chars = self._symbol_to_id\n", - " txt_filt = \"\".join(list(filter(lambda x: x in val_chars, text)))\n", - " print(f\"text after filtering OOV: {txt_filt}\")\n", - " return txt_filt\n", - "\n", - "def preprocess_text(txt, text_mapper, hps, uroman_dir=None, lang=None):\n", - " txt = preprocess_char(txt, lang=lang)\n", - " is_uroman = hps.data.training_files.split('.')[-1] == 'uroman'\n", - " if is_uroman:\n", - " with tempfile.TemporaryDirectory() as tmp_dir:\n", - " if uroman_dir is None:\n", - " cmd = f\"git clone git@github.com:isi-nlp/uroman.git {tmp_dir}\"\n", - " print(cmd)\n", - " subprocess.check_output(cmd, shell=True)\n", - " uroman_dir = tmp_dir\n", - " uroman_pl = os.path.join(uroman_dir, \"bin\", \"uroman.pl\")\n", - " print(f\"uromanize\")\n", - " txt = text_mapper.uromanize(txt, uroman_pl)\n", - " print(f\"uroman text: {txt}\")\n", - " txt = txt.lower()\n", - " txt = text_mapper.filter_oov(txt)\n", - " return txt\n", - "\n", - "if torch.cuda.is_available():\n", - " device = torch.device(\"cuda\")\n", - "else:\n", - " device = torch.device(\"cpu\")\n", - "\n", - "print(f\"Run inference with {device}\")\n", - "vocab_file = f\"{ckpt_dir}/vocab.txt\"\n", - "config_file = f\"{ckpt_dir}/config.json\"\n", - "assert os.path.isfile(config_file), f\"{config_file} doesn't exist\"\n", - "hps = utils.get_hparams_from_file(config_file)\n", - "text_mapper = TextMapper(vocab_file)\n", - "net_g = SynthesizerTrn(\n", - " len(text_mapper.symbols),\n", - " hps.data.filter_length // 2 + 1,\n", - " hps.train.segment_size // hps.data.hop_length,\n", - " **hps.model)\n", - "net_g.to(device)\n", - "_ = net_g.eval()\n", - "\n", - "g_pth = f\"{ckpt_dir}/G_100000.pth\"\n", - "print(f\"load {g_pth}\")\n", - "\n", - "_ = utils.load_checkpoint(g_pth, net_g, None)" - ] - }, - { - "cell_type": "markdown", - "source": [ - "## 4. Generate an audio given text\n", - "Specify the sentence you want to synthesize and generate the audio" - ], - "metadata": { - "id": "fIiwaWl6SiVy" - } - }, - { - "cell_type": "code", - "source": [ - "txt = \"Expanding the language coverage of speech technology has the potential to improve access to information for many more people\"\n", - "\n", - "print(f\"text: {txt}\")\n", - "txt = preprocess_text(txt, text_mapper, hps, lang=LANG)\n", - "stn_tst = text_mapper.get_text(txt, hps)\n", - "with torch.no_grad():\n", - " x_tst = stn_tst.unsqueeze(0).to(device)\n", - " x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(device)\n", - " hyp = net_g.infer(\n", - " x_tst, x_tst_lengths, noise_scale=.667,\n", - " noise_scale_w=0.8, length_scale=1.0\n", - " )[0][0,0].cpu().float().numpy()\n", - "\n", - "print(f\"Generated audio\") \n", - "Audio(hyp, rate=hps.data.sampling_rate)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 165 - }, - "id": "mpSvjfSCGBDm", - "outputId": "142581f8-e9ec-4d17-d4da-413176e3cee3" - }, - "execution_count": 4, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "text: Expanding the language coverage of speech technology has the potential to improve access to information for many more people\n", - "eng\n", - "text after filtering OOV: expanding the language coverage of speech technology has the potential to improve access to information for many more people\n", - "Generated audio\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ], - "text/html": [ - "\n", - " \n", - " " - ] - }, - "metadata": {}, - "execution_count": 4 - } - ] - } - ], - "metadata": { - "colab": { - "provenance": [], - "gpuType": "T4" - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - }, - "accelerator": "GPU" - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file