From dc2f24ef19d8a8b6703e933fb84f13af19ff49d6 Mon Sep 17 00:00:00 2001 From: Ihor Makhnyk Date: Tue, 14 Nov 2023 01:22:00 +0200 Subject: [PATCH] Feat: Create Speech [tts-1, tts-1-hd] --- Sources/OpenAI/OpenAI.swift | 48 +++++++++++- .../Public/Models/AudioSpeechQuery.swift | 78 +++++++++++++++++++ .../Public/Models/AudioSpeechResult.swift | 55 +++++++++++++ .../OpenAI/Public/Models/Models/Models.swift | 7 ++ 4 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 Sources/OpenAI/Public/Models/AudioSpeechQuery.swift create mode 100644 Sources/OpenAI/Public/Models/AudioSpeechResult.swift diff --git a/Sources/OpenAI/OpenAI.swift b/Sources/OpenAI/OpenAI.swift index 720f0f23..718d60b4 100644 --- a/Sources/OpenAI/OpenAI.swift +++ b/Sources/OpenAI/OpenAI.swift @@ -111,13 +111,20 @@ final public class OpenAI: OpenAIProtocol { public func audioTranslations(query: AudioTranslationQuery, completion: @escaping (Result) -> Void) { performRequest(request: MultipartFormDataRequest(body: query, url: buildURL(path: .audioTranslations)), completion: completion) } + + public func audioCreateSpeech(query: AudioSpeechQuery, completion: @escaping (Result) -> Void) { + performSpeechRequest(request: JSONRequest(body: query, url: buildURL(path: .audioSpeech)), completion: completion) + } + } extension OpenAI { func performRequest(request: any URLRequestBuildable, completion: @escaping (Result) -> Void) { do { - let request = try request.build(token: configuration.token, organizationIdentifier: configuration.organizationIdentifier, timeoutInterval: configuration.timeoutInterval) + let request = try request.build(token: configuration.token, + organizationIdentifier: configuration.organizationIdentifier, + timeoutInterval: configuration.timeoutInterval) let task = session.dataTask(with: request) { data, _, error in if let error = error { completion(.failure(error)) @@ -153,7 +160,9 @@ extension OpenAI { func performSteamingRequest(request: any URLRequestBuildable, onResult: @escaping (Result) -> Void, completion: ((Error?) -> Void)?) { do { - let request = try request.build(token: configuration.token, organizationIdentifier: configuration.organizationIdentifier, timeoutInterval: configuration.timeoutInterval) + let request = try request.build(token: configuration.token, + organizationIdentifier: configuration.organizationIdentifier, + timeoutInterval: configuration.timeoutInterval) let session = StreamingSession(urlRequest: request) session.onReceiveContent = {_, object in onResult(.success(object)) @@ -171,6 +180,40 @@ extension OpenAI { completion?(error) } } + + func performSpeechRequest(request: any URLRequestBuildable, completion: @escaping (Result) -> Void) { + do { + let request = try request.build(token: configuration.token, + organizationIdentifier: configuration.organizationIdentifier, + timeoutInterval: configuration.timeoutInterval) + + let task = session.dataTask(with: request) { data, _, error in + if let error = error { + completion(.failure(error)) + return + } + guard let data = data else { + completion(.failure(OpenAIError.emptyData)) + return + } + + var apiError: Error? = nil + completion(.success(AudioSpeechResult(audioData: data))) + + if let apiError = apiError { + do { + let decoded = try JSONDecoder().decode(APIErrorResponse.self, from: data) + completion(.failure(decoded)) + } catch { + completion(.failure(apiError)) + } + } + } + task.resume() + } catch { + completion(.failure(error)) + } + } } extension OpenAI { @@ -194,6 +237,7 @@ extension APIPath { static let models = "/v1/models" static let moderations = "/v1/moderations" + static let audioSpeech = "/v1/audio/speech" static let audioTranscriptions = "/v1/audio/transcriptions" static let audioTranslations = "/v1/audio/translations" diff --git a/Sources/OpenAI/Public/Models/AudioSpeechQuery.swift b/Sources/OpenAI/Public/Models/AudioSpeechQuery.swift new file mode 100644 index 00000000..d1d23a83 --- /dev/null +++ b/Sources/OpenAI/Public/Models/AudioSpeechQuery.swift @@ -0,0 +1,78 @@ +// +// AudioSpeechQuery.swift +// +// +// Created by Ihor Makhnyk on 13.11.2023. +// + +import Foundation + +/// Learn more: [OpenAI Speech – Documentation](https://platform.openai.com/docs/api-reference/audio/createSpeech) +public struct AudioSpeechQuery: Codable, Equatable { + + /// Encapsulates the voices available for audio generation. + /// + /// To get aquinted with each of the voices and listen to the samples visit: + /// [OpenAI Text-to-Speech – Voice Options](https://platform.openai.com/docs/guides/text-to-speech/voice-options) + public enum AudioSpeechVoice: String, Codable { + case alloy, + echo, + fable, + onyx, + nova, + shimmer + } + + /// Encapsulates the response formats available for audio data. + /// + /// **Formats:** + /// - mp3 + /// - opus + /// - aac + /// - flac + public enum AudioSpeechResponseFormat: String, Codable { + case mp3, + opus, + aac, + flac + } + /// One of the available TTS models: tts-1 or tts-1-hd + public let model: Model + /// The text to generate audio for. The maximum length is 4096 characters. + public let input: String? + /// The voice to use when generating the audio. Supported voices are alloy, echo, fable, onyx, nova, and shimmer. + public let voice: AudioSpeechVoice + /// The format to audio in. Supported formats are mp3, opus, aac, and flac. + public let response_format: AudioSpeechResponseFormat + /// The speed of the generated audio. Enter a value between **0.25** and **4.0**. Default: **1.0** + public let speed: String? + + public init(model: Model?, + input: String, + voice: AudioSpeechVoice, + response_format: AudioSpeechResponseFormat = .mp3, + speed: Double?) { + + self.model = { + guard let model else { return .tts_1 } + let isModelOfIncorrentFormat = model != .tts_1 && model != .tts_1_hd + guard !isModelOfIncorrentFormat else { + NSLog("[AudioSpeech] 'AudioSpeechQuery' must have a valid Text-To-Speech model, 'tts-1' or 'tts-1-hd'. Setting model to 'tts-1'.") + return .tts_1 + } + return model + }() + self.input = input + self.voice = voice + self.speed = { + guard let speed else { return "1.0" } + let isSpeedOutOfBounds = speed >= 4.0 && speed <= 0.25 + guard !isSpeedOutOfBounds else { + NSLog("[AudioSpeech] Speed value must be between 0.25 and 4.0. Setting value to closest valid.") + return speed < 0.25 ? "1.0" : "4.0" + } + return String("\(speed)") + }() + self.response_format = response_format + } +} diff --git a/Sources/OpenAI/Public/Models/AudioSpeechResult.swift b/Sources/OpenAI/Public/Models/AudioSpeechResult.swift new file mode 100644 index 00000000..dda078c8 --- /dev/null +++ b/Sources/OpenAI/Public/Models/AudioSpeechResult.swift @@ -0,0 +1,55 @@ +// +// AudioSpeechResult.swift +// +// +// Created by Ihor Makhnyk on 13.11.2023. +// + +import Foundation +import AVFoundation + +public struct AudioSpeechResult { + + /// Audio data for one of the following formats :`mp3`, `opus`, `aac`, `flac` + public let audioData: Data? + + /// Saves the audio data to a file at a specified file path. + /// + /// - Parameters: + /// - name: The name for the file. + /// - format: The format of the audio data, as defined in **`AudioSpeechQuery.AudioSpeechResponseFormat`**. For example: **`.mp3`** + /// - path: The destination file path as an URL. + /// - Throws: Throws an NSError if there is an issue with writing the data to the specified file path. + public func saveAs(_ name: String, format: AudioSpeechQuery.AudioSpeechResponseFormat, to path: URL) throws { + guard let data = audioData else { + throw NSError( + domain: Bundle.main.bundleIdentifier!, + code: 1, + userInfo: [NSLocalizedDescriptionKey: "No audio data"] + ) + } + let filename = "\(name).\(format.rawValue)" + let fileURL = path.appendingPathComponent(filename) + try data.write(to: fileURL) + } + + /// Gets an `AVAudioPlayer` instance configured with the audio data. + /// + /// - Returns: An `AVAudioPlayer` instance or nil if there is no audio data or if there is issue initializing an `AVAudioPlayer`. + /// - Note: Import **AVFoundation** + public func getAudioPlayer() -> AVAudioPlayer? { + guard let data = audioData else { + NSLog("No audio data") + return nil + } + + do { + let audioPlayer = try AVAudioPlayer(data: data) + return audioPlayer + } catch { + NSLog("Error initializing audio player: \(error)") + return nil + } + } + +} diff --git a/Sources/OpenAI/Public/Models/Models/Models.swift b/Sources/OpenAI/Public/Models/Models/Models.swift index 2b356889..289b7b6e 100644 --- a/Sources/OpenAI/Public/Models/Models/Models.swift +++ b/Sources/OpenAI/Public/Models/Models/Models.swift @@ -55,6 +55,13 @@ public extension Model { static let textDavinci_001 = "text-davinci-001" static let codeDavinciEdit_001 = "code-davinci-edit-001" + // Speech + + /// The latest text to speech model, optimized for speed. + static let tts_1 = "tts-1" + /// The latest text to speech model, optimized for quality. + static let tts_1_hd = "tts-1-hd" + // Transcriptions / Translations static let whisper_1 = "whisper-1"