Merge branch 'feat/add_tts_wakenet' into 'master'

Feat/add tts wakenet See merge request speech-recognition-framework/esp-sr!79
espressif · Nov 27, 2023 · 162de62 · 162de62
2 parents 34381ef + 395ee90
commit 162de62
Show file tree

Hide file tree

Showing 10 changed files with 33 additions and 21 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@ If you can not map model partition successfully, please check the left free stor
 ## unreleased
 - Add Chinese MultiNet7 models
 - Add first Noise Suppression model: nsnet1
+- Add wakenet model trained by TTS samples: Jarvis and Computer
 
 ## 1.5.1
 - Reduce Internal RAM of multinet7

diff --git a/Kconfig.projbuild b/Kconfig.projbuild
@@ -107,6 +107,14 @@ choice SR_WN_MODEL_LOAD
     config SR_WN_WN9_NIHAOXIAOZHI
         bool "nihaoxiaozhi (wn9_nihaoxiaozhi)"
         depends on IDF_TARGET_ESP32S3
+
+    config SR_WN_WN9_JARVIS_TTS
+        bool "jarvis (wn9_jarvis_tts)"
+        depends on IDF_TARGET_ESP32S3
+
+    config SR_WN_WN9_COMPUTER_TTS
+        bool "computer (wn9_computer_tts)"
+        depends on IDF_TARGET_ESP32S3
 
     config SR_WN_WN9_CUSTOMWORD
         bool "customized word (wn9_customword)"

diff --git a/README.md b/README.md
@@ -25,7 +25,22 @@ The new algorithms will no longer support ESP32 chips.
 
 Espressif wake word engine **WakeNet** is specially designed to provide a high performance and low memory footprint wake word detection algorithm for users, which enables devices always listen to wake words, such as “Alexa”, “Hi,lexin” and “Hi,ESP”.
 
-Currently, Espressif has not only provided an official wake word "Hi,Lexin","Hi,ESP" to the public for free, but also allows customized wake words. For details on how to customize your own wake words, please see [Espressif Speech Wake Words Customization Process](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/wake_word_engine/ESP_Wake_Words_Customization.html).
+Currently, Espressif has not only provided an official wake word "Hi,Lexin","Hi,ESP" to the public for free, but also allows customized wake words. For details on how to customize your own wake words, please see [Espressif Speech Wake Words Customization Process](https://docs.espressif.com/projects/esp-sr/en/latest/esp32s3/wake_word_engine/ESP_Wake_Words_Customization.html).  
+
+The following wake words are supported in esp-sr:
+
+|wake words       |                 ESP32                 |         ESP32-S3       | 
+|:--------------- | :------------------------------------:| :---------------------:| 
+|Hi,乐鑫           |  wn5_hilexin, wn5_hilexinX3           | wn9_hilexin            | 
+|你好小智          |  wn5_nihaoxiaozhi,wn5_nihaoxiaozhiX3   | wn9_nihaoxiaozhi       |
+|Hi,ESP           |                                        | wn9_hiesp              | 
+|Hi,M Five        |                                        | wn9_himfive            | 
+|Alexa            |                                        | wn9_alexa              | 
+|小爱同学          |                                        | wn9_xiaoaitongxue      | 
+|Jarvis           |                                        | wn9_jarvis_tts         | 
+|Computer         |                                        | wn9_computer_tts       | 
+
+*NOTE:* `_tts` suffix means this WakeNet model is trained by TTS samples.  
 
 ## Speech Command Recognition
 

diff --git a/model/movemodel.py b/model/movemodel.py
@@ -51,26 +51,10 @@ def copy_wakenet_from_sdkconfig(model_path, sdkconfig_path, target_path):
         models.append('wn9_himfive')
     if "CONFIG_SR_WN_WN9_NIHAOXIAOZHI" in models_string:
         models.append('wn9_nihaoxiaozhi')
-    if "CONFIG_SR_WN_WN9_CUSTOMWORD" in models_string:
-        models.append('wn9_customword')
-
-    for item in models:
-        shutil.copytree(model_path + '/wakenet_model/' + item, target_path+'/'+item)
-
-def copy_multinet_from_sdkconfig(model_path, sdkconfig_path, target_path):
-    """
-    Copy multinet model from model_path to target_path based on sdkconfig
-    """
-    with io.open(sdkconfig_path, "r") as f:
-        models_string = ''
-        for label in f:
-            label = label.strip("\n")
-            if 'CONFIG_SR_MN' in label and label[0] != '#':
-                models_string += label
-
-    models = []
-    if "CONFIG_SR_WN_WN9_NIHAOXIAOZHI" in models_string:
-        models.append('wn9_nihaoxiaozhi')
+    if "CONFIG_SR_WN_WN9_JARVIS_TTS" in models_string:
+        models.append('wn9_jarvis_tts')
+    if "CONFIG_SR_WN_WN9_COMPUTER_TTS" in models_string:
+        models.append('wn9_computer_tts')
     if "CONFIG_SR_WN_WN9_CUSTOMWORD" in models_string:
         models.append('wn9_customword')
 

diff --git a/model/wakenet_model/wn9_computer_tts/_MODEL_INFO_ b/model/wakenet_model/wn9_computer_tts/_MODEL_INFO_
@@ -0,0 +1,2 @@
+# (neural network type)_(model data version)_(lable1_detection windown length_threshold for 90%_threshold for 95%)_(lable2 ...)_...
+wakenet9l_tts1h8_computer_3_0.648_0.650
diff --git a/model/wakenet_model/wn9_computer_tts/wn9_data b/model/wakenet_model/wn9_computer_tts/wn9_data
diff --git a/model/wakenet_model/wn9_computer_tts/wn9_index b/model/wakenet_model/wn9_computer_tts/wn9_index
diff --git a/model/wakenet_model/wn9_jarvis_tts/_MODEL_INFO_ b/model/wakenet_model/wn9_jarvis_tts/_MODEL_INFO_
@@ -0,0 +1,2 @@
+# (neural network type)_(model data version)_(lable1_detection windown length_threshold for 90%_threshold for 95%)_(lable2 ...)_...
+wakenet9l_tts1h8_jarvis_3_0.627_0.632
diff --git a/model/wakenet_model/wn9_jarvis_tts/wn9_data b/model/wakenet_model/wn9_jarvis_tts/wn9_data
diff --git a/model/wakenet_model/wn9_jarvis_tts/wn9_index b/model/wakenet_model/wn9_jarvis_tts/wn9_index