Skip to content

Commit

Permalink
Feat: Create Speech [tts-1, tts-1-hd]
Browse files Browse the repository at this point in the history
  • Loading branch information
Ihor Makhnyk committed Nov 13, 2023
1 parent 41b3515 commit dc2f24e
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 2 deletions.
48 changes: 46 additions & 2 deletions Sources/OpenAI/OpenAI.swift
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,20 @@ final public class OpenAI: OpenAIProtocol {
public func audioTranslations(query: AudioTranslationQuery, completion: @escaping (Result<AudioTranslationResult, Error>) -> Void) {
performRequest(request: MultipartFormDataRequest<AudioTranslationResult>(body: query, url: buildURL(path: .audioTranslations)), completion: completion)
}

public func audioCreateSpeech(query: AudioSpeechQuery, completion: @escaping (Result<AudioSpeechResult, Error>) -> Void) {
performSpeechRequest(request: JSONRequest<AudioSpeechResult>(body: query, url: buildURL(path: .audioSpeech)), completion: completion)
}

}

extension OpenAI {

func performRequest<ResultType: Codable>(request: any URLRequestBuildable, completion: @escaping (Result<ResultType, Error>) -> Void) {
do {
let request = try request.build(token: configuration.token, organizationIdentifier: configuration.organizationIdentifier, timeoutInterval: configuration.timeoutInterval)
let request = try request.build(token: configuration.token,
organizationIdentifier: configuration.organizationIdentifier,
timeoutInterval: configuration.timeoutInterval)
let task = session.dataTask(with: request) { data, _, error in
if let error = error {
completion(.failure(error))
Expand Down Expand Up @@ -153,7 +160,9 @@ extension OpenAI {

func performSteamingRequest<ResultType: Codable>(request: any URLRequestBuildable, onResult: @escaping (Result<ResultType, Error>) -> Void, completion: ((Error?) -> Void)?) {
do {
let request = try request.build(token: configuration.token, organizationIdentifier: configuration.organizationIdentifier, timeoutInterval: configuration.timeoutInterval)
let request = try request.build(token: configuration.token,
organizationIdentifier: configuration.organizationIdentifier,
timeoutInterval: configuration.timeoutInterval)
let session = StreamingSession<ResultType>(urlRequest: request)
session.onReceiveContent = {_, object in
onResult(.success(object))
Expand All @@ -171,6 +180,40 @@ extension OpenAI {
completion?(error)
}
}

func performSpeechRequest(request: any URLRequestBuildable, completion: @escaping (Result<AudioSpeechResult, Error>) -> Void) {
do {
let request = try request.build(token: configuration.token,
organizationIdentifier: configuration.organizationIdentifier,
timeoutInterval: configuration.timeoutInterval)

let task = session.dataTask(with: request) { data, _, error in
if let error = error {
completion(.failure(error))
return
}
guard let data = data else {
completion(.failure(OpenAIError.emptyData))
return
}

var apiError: Error? = nil
completion(.success(AudioSpeechResult(audioData: data)))

if let apiError = apiError {
do {
let decoded = try JSONDecoder().decode(APIErrorResponse.self, from: data)
completion(.failure(decoded))
} catch {
completion(.failure(apiError))
}
}
}
task.resume()
} catch {
completion(.failure(error))
}
}
}

extension OpenAI {
Expand All @@ -194,6 +237,7 @@ extension APIPath {
static let models = "/v1/models"
static let moderations = "/v1/moderations"

static let audioSpeech = "/v1/audio/speech"
static let audioTranscriptions = "/v1/audio/transcriptions"
static let audioTranslations = "/v1/audio/translations"

Expand Down
78 changes: 78 additions & 0 deletions Sources/OpenAI/Public/Models/AudioSpeechQuery.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
//
// AudioSpeechQuery.swift
//
//
// Created by Ihor Makhnyk on 13.11.2023.
//

import Foundation

/// Learn more: [OpenAI Speech – Documentation](https://platform.openai.com/docs/api-reference/audio/createSpeech)
public struct AudioSpeechQuery: Codable, Equatable {

/// Encapsulates the voices available for audio generation.
///
/// To get aquinted with each of the voices and listen to the samples visit:
/// [OpenAI Text-to-Speech – Voice Options](https://platform.openai.com/docs/guides/text-to-speech/voice-options)
public enum AudioSpeechVoice: String, Codable {
case alloy,
echo,
fable,
onyx,
nova,
shimmer
}

/// Encapsulates the response formats available for audio data.
///
/// **Formats:**
/// - mp3
/// - opus
/// - aac
/// - flac
public enum AudioSpeechResponseFormat: String, Codable {
case mp3,
opus,
aac,
flac
}
/// One of the available TTS models: tts-1 or tts-1-hd
public let model: Model
/// The text to generate audio for. The maximum length is 4096 characters.
public let input: String?
/// The voice to use when generating the audio. Supported voices are alloy, echo, fable, onyx, nova, and shimmer.
public let voice: AudioSpeechVoice
/// The format to audio in. Supported formats are mp3, opus, aac, and flac.
public let response_format: AudioSpeechResponseFormat
/// The speed of the generated audio. Enter a value between **0.25** and **4.0**. Default: **1.0**
public let speed: String?

public init(model: Model?,
input: String,
voice: AudioSpeechVoice,
response_format: AudioSpeechResponseFormat = .mp3,
speed: Double?) {

self.model = {
guard let model else { return .tts_1 }
let isModelOfIncorrentFormat = model != .tts_1 && model != .tts_1_hd
guard !isModelOfIncorrentFormat else {
NSLog("[AudioSpeech] 'AudioSpeechQuery' must have a valid Text-To-Speech model, 'tts-1' or 'tts-1-hd'. Setting model to 'tts-1'.")
return .tts_1
}
return model
}()
self.input = input
self.voice = voice
self.speed = {
guard let speed else { return "1.0" }
let isSpeedOutOfBounds = speed >= 4.0 && speed <= 0.25
guard !isSpeedOutOfBounds else {
NSLog("[AudioSpeech] Speed value must be between 0.25 and 4.0. Setting value to closest valid.")
return speed < 0.25 ? "1.0" : "4.0"
}
return String("\(speed)")
}()
self.response_format = response_format
}
}
55 changes: 55 additions & 0 deletions Sources/OpenAI/Public/Models/AudioSpeechResult.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
//
// AudioSpeechResult.swift
//
//
// Created by Ihor Makhnyk on 13.11.2023.
//

import Foundation
import AVFoundation

public struct AudioSpeechResult {

/// Audio data for one of the following formats :`mp3`, `opus`, `aac`, `flac`
public let audioData: Data?

/// Saves the audio data to a file at a specified file path.
///
/// - Parameters:
/// - name: The name for the file.
/// - format: The format of the audio data, as defined in **`AudioSpeechQuery.AudioSpeechResponseFormat`**. For example: **`.mp3`**
/// - path: The destination file path as an URL.
/// - Throws: Throws an NSError if there is an issue with writing the data to the specified file path.
public func saveAs(_ name: String, format: AudioSpeechQuery.AudioSpeechResponseFormat, to path: URL) throws {
guard let data = audioData else {
throw NSError(
domain: Bundle.main.bundleIdentifier!,
code: 1,
userInfo: [NSLocalizedDescriptionKey: "No audio data"]
)
}
let filename = "\(name).\(format.rawValue)"
let fileURL = path.appendingPathComponent(filename)
try data.write(to: fileURL)
}

/// Gets an `AVAudioPlayer` instance configured with the audio data.
///
/// - Returns: An `AVAudioPlayer` instance or nil if there is no audio data or if there is issue initializing an `AVAudioPlayer`.
/// - Note: Import **AVFoundation**
public func getAudioPlayer() -> AVAudioPlayer? {
guard let data = audioData else {
NSLog("No audio data")
return nil
}

do {
let audioPlayer = try AVAudioPlayer(data: data)
return audioPlayer
} catch {
NSLog("Error initializing audio player: \(error)")
return nil
}
}

}
7 changes: 7 additions & 0 deletions Sources/OpenAI/Public/Models/Models/Models.swift
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ public extension Model {
static let textDavinci_001 = "text-davinci-001"
static let codeDavinciEdit_001 = "code-davinci-edit-001"

// Speech

/// The latest text to speech model, optimized for speed.
static let tts_1 = "tts-1"
/// The latest text to speech model, optimized for quality.
static let tts_1_hd = "tts-1-hd"

// Transcriptions / Translations

static let whisper_1 = "whisper-1"
Expand Down

0 comments on commit dc2f24e

Please sign in to comment.