diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/speech/SpeakerEmotionInference.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/speech/SpeakerEmotionInference.scala
index 9c5adfe390..442cc21118 100644
--- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/speech/SpeakerEmotionInference.scala
+++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/speech/SpeakerEmotionInference.scala
@@ -29,7 +29,7 @@ class SpeakerEmotionInference(override val uid: String)
setDefault(
locale -> Left("en-US"),
- voiceName -> Left("en-US-JennyNeural"),
+ voiceName -> Left("en-US-JaneNeural"),
text -> Left(this.uid + "_text"))
def urlPath: String = "cognitiveservices/v1"
@@ -54,7 +54,7 @@ class SpeakerEmotionInference(override val uid: String)
override protected def prepareEntity: Row => Option[AbstractHttpEntity] = { row =>
val body: String =
s"" +
+ s" xml:lang='en-US'>" +
s"${getValue(row, text)}"
Some(new StringEntity(body))
}
diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/SpeakerEmotionInferenceSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/SpeakerEmotionInferenceSuite.scala
index a27a0642af..42906dec8e 100644
--- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/SpeakerEmotionInferenceSuite.scala
+++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/SpeakerEmotionInferenceSuite.scala
@@ -20,42 +20,55 @@ class SpeakerEmotionInferenceSuite extends TransformerFuzzing[SpeakerEmotionInfe
.setLocation("eastus")
.setSubscriptionKey(cognitiveKey)
.setLocale("en-US")
- .setVoiceName("en-US-JennyNeural")
+ .setVoiceName("en-US-JaneNeural")
.setTextCol("text")
.setOutputCol("ssml")
val testData: Map[String, String] = Map[String, String](
("\"A\" \"B\" \"C\"",
"" +
+ "xml:lang='en-US'>" +
"\"A\" " +
"\"B\" " +
"\"C\"\n"),
("\"I'm shouting excitedly!\" she shouted excitedly.",
"" +
+ "xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'>" +
"\"I'm shouting excitedly!\" she shouted " +
"excitedly.\n"),
("This text has no quotes in it, so isValid should be false",
"" +
+ "xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'>" +
"This text has no quotes in it, so isValid should be false\n"),
("\"This is an example of a sentence with unmatched quotes,\" she said.\"",
"" +
+ "xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'>" +
"\"This is an example of a sentence with unmatched quotes,\"" +
" she said.\"\n"))
lazy val df: DataFrame = testData.keys.toSeq.toDF("text")
+ def normalizeSSML(ssml: String): String = {
+ val ignoredAttributes: List[String] = List("name", "style", "role")
+ ignoredAttributes.foldLeft(ssml)((acc, attr) =>
+ acc.replaceAll(s"""\\s+$attr='[^']*'""", s"$attr="))
+ }
+
+ /*
+ We're testing the structure of the returned call not the quality of the api, so ignore specifics like role and style
+ */
+ def assertFuzzyEquals(actualSSML: String, expectedSSML: String): Unit = {
+ assert(normalizeSSML(expectedSSML).equals(normalizeSSML(actualSSML)))
+ }
+
test("basic") {
val transformed = ssmlGenerator.transform(df)
transformed.show(truncate = false)
- transformed.collect().map(row => {
+ transformed.collect().foreach { row =>
val actual = testData.getOrElse(row.getString(0), "")
val expected = row.getString(2)
- assert(actual.equals(expected))
- })
+ assertFuzzyEquals(actual, expected)
+ }
}
test("arbitrary df size") {
@@ -65,9 +78,9 @@ class SpeakerEmotionInferenceSuite extends TransformerFuzzing[SpeakerEmotionInfe
val actual = row.getString(5)
val expected =
"""""" +
+ """xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'>""" +
s"""Hello\n"""
- assert(actual.equals(expected))
+ assertFuzzyEquals(actual, expected)
})
}
@@ -77,7 +90,7 @@ class SpeakerEmotionInferenceSuite extends TransformerFuzzing[SpeakerEmotionInfe
SSMLConversation(5, 8, """"B"""", "male", "calm"),
SSMLConversation(10, 13, """"C"""", "male", "calm")))) ->
("""""" +
+ """xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'>""" +
""""A", """ +
""""B", """ +
""""C"""" + "\n")),
@@ -86,7 +99,7 @@ class SpeakerEmotionInferenceSuite extends TransformerFuzzing[SpeakerEmotionInfe
SSMLConversation(5, 8, """"B"""", "male", "calm"),
SSMLConversation(9, 12, """"C"""", "male", "calm")))) ->
("""Z""" +
+ """xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'>Z""" +
""""A"Z"B"Z"C"""" +
"""Z""" + "\n")),
@@ -96,7 +109,7 @@ class SpeakerEmotionInferenceSuite extends TransformerFuzzing[SpeakerEmotionInfe
SSMLConversation(6, 9, """"C"""", "male", "calm")))) ->
("""""" +
- """"A"""" +
+ """"A"""" +
""""B"""" +
""""C"""" + "\n")))
@@ -105,7 +118,7 @@ class SpeakerEmotionInferenceSuite extends TransformerFuzzing[SpeakerEmotionInfe
val result = ssmlGenerator.formatSSML(
test._1._1,
"en-US",
- "en-US-JennyNeural",
+ "en-US-JaneNeural",
test._1._2)
assertResult(test._2)(result)
})
diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/SpeechToTextSDKSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/SpeechToTextSDKSuite.scala
index 8b9f84fe2d..581b2ab4e6 100644
--- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/SpeechToTextSDKSuite.scala
+++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/SpeechToTextSDKSuite.scala
@@ -232,9 +232,9 @@ class SpeechToTextSDKSuite extends TransformerFuzzing[SpeechToTextSDK] with Spee
}
test("SAS URL based access") {
- val sasURL = "https://mmlspark.blob.core.windows.net/datasets/Speech/audio2.wav?sv=2019-12-12" +
- "?sv=2021-10-04&st=2024-02-28T16%3A17%3A55Z&se=2026-03-30T15%3A33%3A00Z" +
- "&sr=c&sp=rl&sig=5Oy6pEaF4hN3lj8uo6daLN%2F%2BiV9VD6XFNSy%2FZ8Upeeg%3D"
+ val sasURL = "https://mmlspark.blob.core.windows.net/datasets/Speech/audio2.wav" +
+ "?sp=r&st=2024-03-18T20:17:56Z&se=9999-03-19T04:17:56Z&spr=https&sv=2022-11-02" +
+ "&sr=b&sig=JUU1ojKzTbb45bSP7rOAVXajwrUEp9Ux20oCiD8%2Bb%2FM%3D"
tryWithRetries(Array(100, 500)) { () => //For handling flaky build machines
val uriDf = Seq(Tuple1(sasURL))
@@ -429,8 +429,8 @@ class ConversationTranscriptionSuite extends TransformerFuzzing[ConversationTran
test("SAS URL based access") {
val sasURL = "https://mmlspark.blob.core.windows.net/datasets/Speech/audio2.wav" +
- "?sv=2021-10-04&st=2024-02-28T16%3A17%3A55Z&se=2026-03-30T15%3A33%3A00Z" +
- "&sr=c&sp=rl&sig=5Oy6pEaF4hN3lj8uo6daLN%2F%2BiV9VD6XFNSy%2FZ8Upeeg%3D"
+ "?sp=r&st=2024-03-18T20:17:56Z&se=9999-03-19T04:17:56Z&spr=https&sv=2022-11-02" +
+ "&sr=b&sig=JUU1ojKzTbb45bSP7rOAVXajwrUEp9Ux20oCiD8%2Bb%2FM%3D"
tryWithRetries(Array(100, 500)) { () => //For handling flaky build machines
val uriDf = Seq(Tuple1(sasURL))
diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/TextToSpeechSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/TextToSpeechSuite.scala
index 32b0cb9310..a73402739b 100644
--- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/TextToSpeechSuite.scala
+++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/speech/TextToSpeechSuite.scala
@@ -43,7 +43,7 @@ class TextToSpeechSuite extends TransformerFuzzing[TextToSpeech] with CognitiveK
"""""" +
- """""" +
+ """""" +
"""This is how I sound right now.""",
new File(saveDir, "test1.mp3").toString)).toDF("text", "filename")