-
Notifications
You must be signed in to change notification settings - Fork 384
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feat: Create Speech [tts-1, tts-1-hd]
- Loading branch information
Ihor Makhnyk
committed
Nov 13, 2023
1 parent
41b3515
commit dc2f24e
Showing
4 changed files
with
186 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
// | ||
// AudioSpeechQuery.swift | ||
// | ||
// | ||
// Created by Ihor Makhnyk on 13.11.2023. | ||
// | ||
|
||
import Foundation | ||
|
||
/// Learn more: [OpenAI Speech – Documentation](https://platform.openai.com/docs/api-reference/audio/createSpeech) | ||
public struct AudioSpeechQuery: Codable, Equatable { | ||
|
||
/// Encapsulates the voices available for audio generation. | ||
/// | ||
/// To get aquinted with each of the voices and listen to the samples visit: | ||
/// [OpenAI Text-to-Speech – Voice Options](https://platform.openai.com/docs/guides/text-to-speech/voice-options) | ||
public enum AudioSpeechVoice: String, Codable { | ||
case alloy, | ||
echo, | ||
fable, | ||
onyx, | ||
nova, | ||
shimmer | ||
} | ||
|
||
/// Encapsulates the response formats available for audio data. | ||
/// | ||
/// **Formats:** | ||
/// - mp3 | ||
/// - opus | ||
/// - aac | ||
/// - flac | ||
public enum AudioSpeechResponseFormat: String, Codable { | ||
case mp3, | ||
opus, | ||
aac, | ||
flac | ||
} | ||
/// One of the available TTS models: tts-1 or tts-1-hd | ||
public let model: Model | ||
/// The text to generate audio for. The maximum length is 4096 characters. | ||
public let input: String? | ||
/// The voice to use when generating the audio. Supported voices are alloy, echo, fable, onyx, nova, and shimmer. | ||
public let voice: AudioSpeechVoice | ||
/// The format to audio in. Supported formats are mp3, opus, aac, and flac. | ||
public let response_format: AudioSpeechResponseFormat | ||
/// The speed of the generated audio. Enter a value between **0.25** and **4.0**. Default: **1.0** | ||
public let speed: String? | ||
|
||
public init(model: Model?, | ||
input: String, | ||
voice: AudioSpeechVoice, | ||
response_format: AudioSpeechResponseFormat = .mp3, | ||
speed: Double?) { | ||
|
||
self.model = { | ||
guard let model else { return .tts_1 } | ||
let isModelOfIncorrentFormat = model != .tts_1 && model != .tts_1_hd | ||
guard !isModelOfIncorrentFormat else { | ||
NSLog("[AudioSpeech] 'AudioSpeechQuery' must have a valid Text-To-Speech model, 'tts-1' or 'tts-1-hd'. Setting model to 'tts-1'.") | ||
return .tts_1 | ||
} | ||
return model | ||
}() | ||
self.input = input | ||
self.voice = voice | ||
self.speed = { | ||
guard let speed else { return "1.0" } | ||
let isSpeedOutOfBounds = speed >= 4.0 && speed <= 0.25 | ||
guard !isSpeedOutOfBounds else { | ||
NSLog("[AudioSpeech] Speed value must be between 0.25 and 4.0. Setting value to closest valid.") | ||
return speed < 0.25 ? "1.0" : "4.0" | ||
} | ||
return String("\(speed)") | ||
}() | ||
self.response_format = response_format | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
// | ||
// AudioSpeechResult.swift | ||
// | ||
// | ||
// Created by Ihor Makhnyk on 13.11.2023. | ||
// | ||
|
||
import Foundation | ||
import AVFoundation | ||
|
||
public struct AudioSpeechResult { | ||
|
||
/// Audio data for one of the following formats :`mp3`, `opus`, `aac`, `flac` | ||
public let audioData: Data? | ||
|
||
/// Saves the audio data to a file at a specified file path. | ||
/// | ||
/// - Parameters: | ||
/// - name: The name for the file. | ||
/// - format: The format of the audio data, as defined in **`AudioSpeechQuery.AudioSpeechResponseFormat`**. For example: **`.mp3`** | ||
/// - path: The destination file path as an URL. | ||
/// - Throws: Throws an NSError if there is an issue with writing the data to the specified file path. | ||
public func saveAs(_ name: String, format: AudioSpeechQuery.AudioSpeechResponseFormat, to path: URL) throws { | ||
guard let data = audioData else { | ||
throw NSError( | ||
domain: Bundle.main.bundleIdentifier!, | ||
code: 1, | ||
userInfo: [NSLocalizedDescriptionKey: "No audio data"] | ||
) | ||
} | ||
let filename = "\(name).\(format.rawValue)" | ||
let fileURL = path.appendingPathComponent(filename) | ||
try data.write(to: fileURL) | ||
} | ||
|
||
/// Gets an `AVAudioPlayer` instance configured with the audio data. | ||
/// | ||
/// - Returns: An `AVAudioPlayer` instance or nil if there is no audio data or if there is issue initializing an `AVAudioPlayer`. | ||
/// - Note: Import **AVFoundation** | ||
public func getAudioPlayer() -> AVAudioPlayer? { | ||
guard let data = audioData else { | ||
NSLog("No audio data") | ||
return nil | ||
} | ||
|
||
do { | ||
let audioPlayer = try AVAudioPlayer(data: data) | ||
return audioPlayer | ||
} catch { | ||
NSLog("Error initializing audio player: \(error)") | ||
return nil | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters