diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/speech/SpeakerEmotionInference.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/speech/SpeakerEmotionInference.scala index 9c5adfe390..442cc21118 100644 --- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/speech/SpeakerEmotionInference.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/speech/SpeakerEmotionInference.scala @@ -29,7 +29,7 @@ class SpeakerEmotionInference(override val uid: String) setDefault( locale -> Left("en-US"), - voiceName -> Left("en-US-JennyNeural"), + voiceName -> Left("en-US-JaneNeural"), text -> Left(this.uid + "_text")) def urlPath: String = "cognitiveservices/v1" @@ -54,7 +54,7 @@ class SpeakerEmotionInference(override val uid: String) override protected def prepareEntity: Row => Option[AbstractHttpEntity] = { row => val body: String = s"" + + s" xml:lang='en-US'>" + s"${getValue(row, text)}" Some(new StringEntity(body)) } diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/SpeakerEmotionInferenceSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/SpeakerEmotionInferenceSuite.scala index a27a0642af..42906dec8e 100644 --- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/SpeakerEmotionInferenceSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/SpeakerEmotionInferenceSuite.scala @@ -20,42 +20,55 @@ class SpeakerEmotionInferenceSuite extends TransformerFuzzing[SpeakerEmotionInfe .setLocation("eastus") .setSubscriptionKey(cognitiveKey) .setLocale("en-US") - .setVoiceName("en-US-JennyNeural") + .setVoiceName("en-US-JaneNeural") .setTextCol("text") .setOutputCol("ssml") val testData: Map[String, String] = Map[String, String]( ("\"A\" \"B\" \"C\"", "" + + "xml:lang='en-US'>" + "\"A\" " + "\"B\" " + "\"C\"\n"), ("\"I'm shouting excitedly!\" she shouted excitedly.", "" + + "xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'>" + "\"I'm shouting excitedly!\" she shouted " + "excitedly.\n"), ("This text has no quotes in it, so isValid should be false", "" + + "xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'>" + "This text has no quotes in it, so isValid should be false\n"), ("\"This is an example of a sentence with unmatched quotes,\" she said.\"", "" + + "xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'>" + "\"This is an example of a sentence with unmatched quotes,\"" + " she said.\"\n")) lazy val df: DataFrame = testData.keys.toSeq.toDF("text") + def normalizeSSML(ssml: String): String = { + val ignoredAttributes: List[String] = List("name", "style", "role") + ignoredAttributes.foldLeft(ssml)((acc, attr) => + acc.replaceAll(s"""\\s+$attr='[^']*'""", s"$attr=")) + } + + /* + We're testing the structure of the returned call not the quality of the api, so ignore specifics like role and style + */ + def assertFuzzyEquals(actualSSML: String, expectedSSML: String): Unit = { + assert(normalizeSSML(expectedSSML).equals(normalizeSSML(actualSSML))) + } + test("basic") { val transformed = ssmlGenerator.transform(df) transformed.show(truncate = false) - transformed.collect().map(row => { + transformed.collect().foreach { row => val actual = testData.getOrElse(row.getString(0), "") val expected = row.getString(2) - assert(actual.equals(expected)) - }) + assertFuzzyEquals(actual, expected) + } } test("arbitrary df size") { @@ -65,9 +78,9 @@ class SpeakerEmotionInferenceSuite extends TransformerFuzzing[SpeakerEmotionInfe val actual = row.getString(5) val expected = """""" + + """xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'>""" + s"""Hello\n""" - assert(actual.equals(expected)) + assertFuzzyEquals(actual, expected) }) } @@ -77,7 +90,7 @@ class SpeakerEmotionInferenceSuite extends TransformerFuzzing[SpeakerEmotionInfe SSMLConversation(5, 8, """"B"""", "male", "calm"), SSMLConversation(10, 13, """"C"""", "male", "calm")))) -> ("""""" + + """xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'>""" + """"A", """ + """"B", """ + """"C"""" + "\n")), @@ -86,7 +99,7 @@ class SpeakerEmotionInferenceSuite extends TransformerFuzzing[SpeakerEmotionInfe SSMLConversation(5, 8, """"B"""", "male", "calm"), SSMLConversation(9, 12, """"C"""", "male", "calm")))) -> ("""Z""" + + """xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'>Z""" + """"A"Z"B"Z"C"""" + """Z""" + "\n")), @@ -96,7 +109,7 @@ class SpeakerEmotionInferenceSuite extends TransformerFuzzing[SpeakerEmotionInfe SSMLConversation(6, 9, """"C"""", "male", "calm")))) -> ("""""" + - """"A"""" + + """"A"""" + """"B"""" + """"C"""" + "\n"))) @@ -105,7 +118,7 @@ class SpeakerEmotionInferenceSuite extends TransformerFuzzing[SpeakerEmotionInfe val result = ssmlGenerator.formatSSML( test._1._1, "en-US", - "en-US-JennyNeural", + "en-US-JaneNeural", test._1._2) assertResult(test._2)(result) }) diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/SpeechToTextSDKSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/SpeechToTextSDKSuite.scala index 8b9f84fe2d..581b2ab4e6 100644 --- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/SpeechToTextSDKSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/SpeechToTextSDKSuite.scala @@ -232,9 +232,9 @@ class SpeechToTextSDKSuite extends TransformerFuzzing[SpeechToTextSDK] with Spee } test("SAS URL based access") { - val sasURL = "https://mmlspark.blob.core.windows.net/datasets/Speech/audio2.wav?sv=2019-12-12" + - "?sv=2021-10-04&st=2024-02-28T16%3A17%3A55Z&se=2026-03-30T15%3A33%3A00Z" + - "&sr=c&sp=rl&sig=5Oy6pEaF4hN3lj8uo6daLN%2F%2BiV9VD6XFNSy%2FZ8Upeeg%3D" + val sasURL = "https://mmlspark.blob.core.windows.net/datasets/Speech/audio2.wav" + + "?sp=r&st=2024-03-18T20:17:56Z&se=9999-03-19T04:17:56Z&spr=https&sv=2022-11-02" + + "&sr=b&sig=JUU1ojKzTbb45bSP7rOAVXajwrUEp9Ux20oCiD8%2Bb%2FM%3D" tryWithRetries(Array(100, 500)) { () => //For handling flaky build machines val uriDf = Seq(Tuple1(sasURL)) @@ -429,8 +429,8 @@ class ConversationTranscriptionSuite extends TransformerFuzzing[ConversationTran test("SAS URL based access") { val sasURL = "https://mmlspark.blob.core.windows.net/datasets/Speech/audio2.wav" + - "?sv=2021-10-04&st=2024-02-28T16%3A17%3A55Z&se=2026-03-30T15%3A33%3A00Z" + - "&sr=c&sp=rl&sig=5Oy6pEaF4hN3lj8uo6daLN%2F%2BiV9VD6XFNSy%2FZ8Upeeg%3D" + "?sp=r&st=2024-03-18T20:17:56Z&se=9999-03-19T04:17:56Z&spr=https&sv=2022-11-02" + + "&sr=b&sig=JUU1ojKzTbb45bSP7rOAVXajwrUEp9Ux20oCiD8%2Bb%2FM%3D" tryWithRetries(Array(100, 500)) { () => //For handling flaky build machines val uriDf = Seq(Tuple1(sasURL)) diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/TextToSpeechSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/TextToSpeechSuite.scala index 32b0cb9310..a73402739b 100644 --- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/TextToSpeechSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/TextToSpeechSuite.scala @@ -43,7 +43,7 @@ class TextToSpeechSuite extends TransformerFuzzing[TextToSpeech] with CognitiveK """""" + - """""" + + """""" + """This is how I sound right now.""", new File(saveDir, "test1.mp3").toString)).toDF("text", "filename")