Merge pull request #6 from hmirin/add-openai

Add openai
hmirin · Feb 20, 2024 · eab62c2 · eab62c2
2 parents a5047e6 + 7807248
commit eab62c2
Show file tree

Hide file tree

Showing 7 changed files with 225 additions and 148 deletions.
diff --git a/README.md b/README.md
@@ -6,8 +6,8 @@ See [project page](https://hmirin.github.io/speechy/).
 ## Features
 
 Available
-- Read selected text aloud from context menu  
-- Change voice speed from extension icon menu
+- Read selected text aloud from context menu
+- [Google] Change voice speed from extension icon menu
 
 Planned
 - Stop voice from extension menu.
@@ -16,7 +16,4 @@ Planned
 
 Available
 - Google Cloud Text-to-Speech API
-
-Planned
-- Azure
-
+- OpenAI Text-to-Speech API
diff --git a/docs/index.md b/docs/index.md
@@ -4,14 +4,14 @@ layout: default
 
 # What's this?
 
-Speechy provides an easy interface for paid Text-to-Speech (TTS) APIs.
+Speechy provides an easy interface for **paid** Text-to-Speech (TTS) APIs.
 Select text and click Speechy from the context menu. That's it. Your browser reads the text aloud with a high-quality voice compared to your OS's default TTS engine or free online APIs.
 
 ![Screenshot_2](/images/screenshot_2.png)
 
 # Paid API?
 
-You will find many other chrome extensions that use free TTS APIs or OS’s default TTS engine. Voices generated by these engines are low quality and not comfortable to hear. In contrast, paid APIs generate reasonable voice. Especially, Google’s Cloud Text-to-Speech API can generate unbelievably high-quality voice with cutting-edge Wavenet technology. The author wanted to use this API in Chrome with easy interface. So this extension was made. If you don’t want to pay for TTS engine, this extension is not for you. However, these APIs won’t cost you so much. For example, Google’s Cloud Text-to-Speech API costs you $16.00 USD / 1 million characters after a 1 million character free tier. So, the author is happy if you give it a try!
+You will find many other chrome extensions that use free TTS APIs or OS’s default TTS engine. Voices generated by these engines are low quality and not comfortable to hear. In contrast, paid APIs generate reasonable voice. Especially, Google’s Cloud Text-to-Speech API and OpenAI Speech API can generate high-quality voice with deep learning technology. The author wanted to use this API in Chrome with easy interface. So this extension was made. If you don’t want to pay for TTS engine, this extension is not for you. However, these APIs won’t cost you so much. For example, Google’s Cloud Text-to-Speech API costs you $16.00 USD / 1 million characters after a 1 million character free tier. So, the author is happy if you give it a try!
 
 # Help and Setup
 See this [page](https://hmirin.github.io/speechy/installed)

diff --git a/docs/installed.md b/docs/installed.md
@@ -7,17 +7,26 @@ layout: pages
 
 ## Setup
 
-1. You need to get your API key for the API provider you choose. Currently, Speechy supports only this API.
+1. You need to get your API key for the API provider you choose. Currently, Speechy supports:
     - [Google Cloud Text-to-Speech API](https://cloud.google.com/text-to-speech/)
         - Description: This API provides WaveNet (Deep Learning based high quality voice synthesis algorithm) voice. Highly recommended!
         - Follow the instruction on [this link](https://support.google.com/cloud/answer/6158862) to get your API key.
             - We recommend you to restrict the API key only to Cloud Text-to-speech API for security.
+    - [OpenAI Text-to-Speech API](https://platform.openai.com/docs/guides/text-to-speech)
+        - Description: This API provides high quality voice synthesis algorithm. It's also good.
+        - Follow the instruction on [this link](https://www.howtogeek.com/885918/how-to-get-an-openai-api-key/) to get your API key.
+            - We recommend you to restrict the API key only to Model capabilities for security.
 
 2. Click this extension's icon on the address bar and choose API and add API key and push save.
     ![Screenshot_1](/images/screenshot_1.png)
 3. Choose voice you like.
-    - Choose voice whose language matches the text.
-    - We recommend you to use the voice named with "wavenet".
+  - For Google Cloud Text-to-Speech API:
+    - Choose voice whose language matches the text. 
+      - en-US-Wavenet-D is for en-US and the speaker ID is D.
+    - We recommend you to use the voice named with "Wavenet".
+  - For OpenAI Text-to-Speech API:
+    - All voices are multi-lingual. Choose voice you like.
+    - However, non-English voices are pretty immature. We recommend you to use Google Cloud Text-to-Speech API for non-English text if available for your language.
 
 ## Usage
 

diff --git a/js/background.js b/js/background.js
@@ -81,15 +81,30 @@ function getSelectionText() {
 
 function to_voice(text) {
     chrome.storage.sync.get({
-        api_provider: "",
-        apikey: "",
-        chosen_provider_options: {}
+        api_provider: "Google",
+        openai_apikey: "",
+        google_apikey: "",
+        openai_voice: "alloy",
+        google_voice: "en-US-Wavenet-D",
+        google_speed: 1,
     }, function (items) {
         var api_provider = items.api_provider;
-        var api_key = items.apikey;
-        var chosen_provider_options = items.chosen_provider_options;
+        if (api_provider == "Google") {
+            chosen_provider_options = {
+                voice: items.google_voice,
+                speed: items.google_speed
+            };
+            api_key = items.google_apikey;
+        } else if (api_provider == "OpenAI") {
+            chosen_provider_options = {
+                voice: items.openai_voice
+            };
+            api_key = items.openai_apikey;
+        }
         if (api_provider == "Google") {
             google_cloud_tts(text, chosen_provider_options, api_key);
+        } else if (api_provider == "OpenAI") {
+            openai_tts(text, chosen_provider_options, api_key);
         } else {
             chrome.notifications.create({
                 type: 'basic',
@@ -157,6 +172,61 @@ function google_cloud_tts_error_handler(err) {
     console.error(err);
 }
 
+function openai_tts(text, chosen_provider_options, api_key) {
+    var endpoint = "https://api.openai.com/v1/audio/speech";
+    var voice = chosen_provider_options.voice || "alloy"; // Default voice if not specified
+    fetch(endpoint, {
+        method: "POST",
+        headers: {
+            "Authorization": "Bearer " + api_key,
+            "Content-Type": "application/json"
+        },
+        body: JSON.stringify({
+            "model": "tts-1",
+            "input": text,
+            "voice": "alloy"
+        }),
+    })
+    .then((res) => {
+        if (res.ok) {
+            res.blob().then((blob) => {
+                var reader = new FileReader();
+                reader.readAsDataURL(blob); 
+                reader.onloadend = function() {
+                    var base64data = reader.result;
+                    playvoice(base64data.split(',')[1]);
+                };
+            });
+        } else {
+            res.json().then(openai_tts_error_handler);
+        }
+    })
+    .catch(function (err) {
+        console.error(err);
+        alert("Network error, see console.")
+    });
+}
+
+function openai_tts_error_handler(err) {
+    try {
+        chrome.notifications.create({
+            type: 'basic',
+            iconUrl: '/images/icon128.png',
+            title: 'Speechy',
+            message: "Error from OpenAI Text-to-Speech API\nMessage: " + err.message + "\nPlease check the options."
+        });
+    } catch (e) {
+        chrome.notifications.create({
+            type: 'basic',
+            iconUrl: '/images/icon128.png',
+            title: 'Speechy',
+            message: "Something went wrong. Please check settings."
+        });
+    }
+    console.error(err);
+}
+
+
 chrome.runtime.onInstalled.addListener(function (details) {
     if (details.reason == "install") {
         chrome.tabs.create({ url: "https://hmirin.github.io/speechy/installed" });

diff --git a/js/popup.js b/js/popup.js
@@ -1,128 +1,115 @@
-var default_api_provider = "Google";
-
-function save_api_options() {
-    var api_provider = document.getElementById('api_provider').value;
-    var apikey = document.getElementById('apikey').value;
-    var chosen_provider_options = get_chosen_provider_options(api_provider);
-    chrome.storage.sync.set({
-        api_provider: api_provider,
-        apikey: apikey,
-        chosen_provider_options: chosen_provider_options
-    }, function () {
-        enable_api_edit_mode(false)
-        show_provider_options(api_provider, true);
-    });
-}
-
-document.getElementById('save_api_options').addEventListener('click', save_api_options);
-
-function save_provider_options() {
-    var api_provider = document.getElementById('api_provider').value;
-    var chosen_provider_options = get_chosen_provider_options(api_provider);
-    chrome.storage.sync.set({
-        chosen_provider_options: chosen_provider_options
-    }, function () {
-    });
-}
-
-var divsToMark = document.getElementsByClassName("provider_settings_form_inputs");
-for (var i = 0; i < divsToMark.length; i++) {
-    divsToMark[i].addEventListener('change', save_provider_options);
-}
-
-
+// Options are given as follows:
+// api_provider: "Google" or "OpenAI"
+// openai_apikey: string or ""
+// google_apikey: string or ""
+// openai_voice: string || "alloy"
+// google_voice: string || "en-US-Wavenet-D"
+// google_speed: number || 1
+// chosen_provider_options (deprecated): that has the following keys:
+//     api_key (deprecated): string || "" (If this is set, the value must be transferred to google_apikey
+//     speed (deprecated): number || 1 (If this is set, the value must be transferred to google_speed)
+//     voice (deprecated): string || "en-US-Wavenet-D" (If this is set, the value must be transferred to google_voice)
+
+// First, restore the options from storage and assigned to the form elements.
+// If deprecated options are set, transfer the values to the new options.
+// On that occasion, save the options to the storage again and remove the deprecated options.
 function restore_options() {
-    // Use default value color = 'red' and likesColor = true.
     chrome.storage.sync.get({
-        api_provider: default_api_provider,
-        apikey: "",
+        api_provider: "Google",
+        openai_apikey: "",
+        google_apikey: "",
+        openai_voice: "alloy",
+        google_voice: "en-US-Wavenet-D",
+        google_speed: 1,
         chosen_provider_options: {}
     }, function (items) {
-        document.getElementById('api_provider').value = items.api_provider;
-        document.getElementById('apikey').value = items.apikey;
-        if (items.apikey != "") {
-            enable_api_edit_mode(false);
-            set_chosen_provider_options(items.api_provider, items.chosen_provider_options);
-            show_provider_options(items.api_provider, true);
-        }
-    });
-}
-
-document.addEventListener('DOMContentLoaded', restore_options);
-
-function get_chosen_provider_options(api_provider) {
-    if (api_provider == "Google") {
-        return {
-            voice: document.getElementById("voice").value,
-            speed: document.getElementById("speed").value
+        // check if deprecated options is not {}
+        if (items.chosen_provider_options!== void 0) {
+            if (items.chosen_provider_options.api_key !== void 0) {
+                items.google_apikey = items.chosen_provider_options.api_key;
+            }
+            if (items.chosen_provider_options.speed !== void 0) {
+                items.google_speed = items.chosen_provider_options.speed;
+            }
+            if (items.chosen_provider_options.voice !== void 0) {
+                items.google_voice = items.chosen_provider_options.voice;
+            }
+            items.chosen_provider_options = {};
+            chrome.storage.sync.set({
+                api_provider: items.api_provider,
+                openai_apikey: items.openai_apikey,
+                google_apikey: items.google_apikey,
+                openai_voice: items.openai_voice,
+                google_voice: items.google_voice,
+                google_speed: items.google_speed,
+                chosen_provider_options: {}
+            }, function () {
+                delete items.chosen_provider_options;
+            });
         }
+        // set the values to the form elements
+        document.getElementById(items.api_provider.toLowerCase()).checked = true;
+        document.getElementById('openai_apikey').value = items.openai_apikey;
+        document.getElementById('google_apikey').value = items.google_apikey;
+        document.getElementById('openai_voice').value = items.openai_voice;
+        document.getElementById('google_voice').value = items.google_voice;
+        document.getElementById('google_speed').value = items.google_speed;
+        sync_speed(items.google_speed);
+        switch_api_options(items.api_provider);
     }
+    );
 }
+document.addEventListener('DOMContentLoaded', restore_options);
 
-function set_chosen_provider_options(api_provider, chosen_provider_options) {
-    if (api_provider == "Google") {
-        document.getElementById("voice").value = chosen_provider_options.voice;
-        sync_speed(chosen_provider_options.speed)
-    }
-}
-
-function change_api_key() {
-    enable_api_edit_mode(true);
-}
-
-document.getElementById('change_api_key').addEventListener('click', change_api_key);
-
-function enable_api_edit_mode(status) {
-    if (status == true) {
-        document.getElementById('change_api_key').style.display = "none";
-        document.getElementById('save_api_options').style.display = "";
-        var divsToHide = document.getElementsByClassName("api_settings_form_inputs"); //divsToHide is an array
-        for (var i = 0; i < divsToHide.length; i++) {
-            divsToHide[i].removeAttribute("disabled");
-        }
-    } else {
-        document.getElementById('change_api_key').style.display = "";
-        document.getElementById('save_api_options').style.display = "none";
-        var divsToHide = document.getElementsByClassName("api_settings_form_inputs"); //divsToHide is an array
-        for (var i = 0; i < divsToHide.length; i++) {
-            divsToHide[i].disabled = "disabled";
-        }
-    }
+function save_api_options() {
+    chrome.storage.sync.set({
+        api_provider: document.querySelector('input[name="api_provider"]:checked').value,
+        openai_apikey: document.getElementById('openai_apikey').value,
+        google_apikey: document.getElementById('google_apikey').value,
+        openai_voice: document.getElementById('openai_voice').value,
+        google_voice: document.getElementById('google_voice').value,
+        google_speed: document.getElementById('google_speed').innerHTML
+    });
 }
 
-function show_provider_options(api_provider, status) {
-    if (status == true) {
-        if (api_provider == "Google") {
-            document.getElementById('provider_settings_form').style.display = "";
-        }
-    } else {
-        if (api_provider == "Google") {
-            document.getElementById('provider_settings_form').style.display = "none";
+function switch_api_options(api_provider) {
+    // set display:none to all the child elements recursively with the class name of the api_provider (lowercase)
+    // and set display:block to the elements with the class name of the api_provider (lowercase)
+    var api_providers = ["Google", "OpenAI"];
+    for (var i = 0; i < api_providers.length; i++) {
+        var provider = api_providers[i];
+        var elements = document.getElementsByClassName(provider.toLowerCase());
+        for (var j = 0; j < elements.length; j++) {
+            console.log(provider, api_provider);
+            elements[j].style.display = (provider === api_provider) ? "block" : "none";
         }
     }
 }
 
-
+// sync_speed is called when the input element with the id of "google_speed" is changed
 function sync_speed(value) {
-    if (value === void 0 || !isFinite(value)) {
-        value = document.getElementById("speed").value;
-    } else if (value < 0.25 || value > 4) {
-        value = 1;
-    }
+    value = document.getElementById("google_speed").value;
     document.getElementById("speedometer").innerHTML = value;
-    document.getElementById("speed").value = value;
 }
 
-document.addEventListener('DOMContentLoaded', function () {
-    document.getElementById("speed").addEventListener("change", sync_speed);
-}
-)
 
 document.addEventListener('DOMContentLoaded', function () {
-    document.getElementById("help_link").addEventListener("click", openIndex);
-}
-)
-
-function openIndex() {
-    chrome.tabs.create({ active: true, url: "https://hmirin.github.io/speechy/installed#usage" });
-}
+    document.getElementById("google_speed").addEventListener("change", function() {
+        sync_speed(this.value);
+    });    
+    document.getElementById("help_link").addEventListener("click", function() {
+        chrome.tabs.create({ active: true, url: "https://hmirin.github.io/speechy/installed#usage" });
+    });
+    var radios = document.querySelectorAll('input[name="api_provider"]');
+    for (var i = 0; i < radios.length; i++) {
+        radios[i].addEventListener('change', function() {
+            save_api_options();
+            switch_api_options(this.value);
+        });
+    }
+    document.getElementById('openai_apikey').addEventListener('input', save_api_options);
+    document.getElementById('google_apikey').addEventListener('input', save_api_options);
+    document.getElementById('openai_voice').addEventListener('input', save_api_options);
+    document.getElementById('google_voice').addEventListener('input', save_api_options);
+    document.getElementById('google_speed').addEventListener('input', save_api_options);
+});