Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: Parameter Update #169

Merged
merged 6 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions Demo/DemoChat/Sources/ChatStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public final class ChatStore: ObservableObject {
return
}

let weatherFunction = ChatFunctionDeclaration(
let weatherFunction = ChatQuery.ChatCompletionToolParam(function: .init(
name: "getWeatherData",
description: "Get the current weather in a given location",
parameters: .init(
Expand All @@ -95,38 +95,38 @@ public final class ChatStore: ObservableObject {
],
required: ["location"]
)
)
))

let functions = [weatherFunction]

let chatsStream: AsyncThrowingStream<ChatStreamResult, Error> = openAIClient.chatsStream(
query: ChatQuery(
model: model,
messages: conversation.messages.map { message in
Chat(role: message.role, content: message.content)
},
functions: functions
ChatQuery.ChatCompletionMessageParam(role: message.role, content: message.content)!
}, model: model,
tools: functions
)
)

var functionCallName = ""
var functionCallArguments = ""
var functionCalls = [(name: String, argument: String?)]()
for try await partialChatResult in chatsStream {
for choice in partialChatResult.choices {
let existingMessages = conversations[conversationIndex].messages
// Function calls are also streamed, so we need to accumulate.
if let functionCallDelta = choice.delta.functionCall {
if let nameDelta = functionCallDelta.name {
functionCallName += nameDelta
}
if let argumentsDelta = functionCallDelta.arguments {
functionCallArguments += argumentsDelta
choice.delta.tool_calls?.forEach { toolCallDelta in
if let functionCallDelta = toolCallDelta.function {
if let nameDelta = functionCallDelta.name {
functionCalls.append((nameDelta, functionCallDelta.arguments))
}
}
}
var messageText = choice.delta.content ?? ""
if let finishReason = choice.finishReason,
finishReason == "function_call" {
messageText += "Function call: name=\(functionCallName) arguments=\(functionCallArguments)"
if let finishReason = choice.finish_reason,
finishReason == .tool_calls
{
functionCalls.forEach { (name: String, argument: String?) in
messageText += "Function call: name=\(name) arguments=\(argument ?? "")\n"
}
}
let message = Message(
id: partialChatResult.id,
Expand Down
2 changes: 1 addition & 1 deletion Demo/DemoChat/Sources/ImageStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import OpenAI
public final class ImageStore: ObservableObject {
public var openAIClient: OpenAIProtocol

@Published var images: [ImagesResult.URLResult] = []
@Published var images: [ImagesResult.Image] = []

public init(
openAIClient: OpenAIProtocol
Expand Down
2 changes: 1 addition & 1 deletion Demo/DemoChat/Sources/MiscStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public final class MiscStore: ObservableObject {
do {
let response = try await openAIClient.moderations(
query: ModerationsQuery(
input: message.content,
input: .init(message.content),
model: .textModerationLatest
)
)
Expand Down
2 changes: 1 addition & 1 deletion Demo/DemoChat/Sources/Models/Message.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import OpenAI

struct Message {
var id: String
var role: Chat.Role
var role: ChatQuery.ChatCompletionMessageParam.Role
var content: String
var createdAt: Date
}
Expand Down
7 changes: 4 additions & 3 deletions Demo/DemoChat/Sources/SpeechStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,16 @@ public final class SpeechStore: ObservableObject {

@MainActor
func createSpeech(_ query: AudioSpeechQuery) async {
guard let input = query.input, !input.isEmpty else { return }
let input = query.input
guard !input.isEmpty else { return }
do {
let response = try await openAIClient.audioCreateSpeech(query: query)
guard let data = response.audioData else { return }
let data = response.audio
let player = try? AVAudioPlayer(data: data)
let audioObject = AudioObject(prompt: input,
audioPlayer: player,
originResponse: response,
format: query.responseFormat.rawValue)
format: query.responseFormat?.rawValue ?? AudioSpeechQuery.AudioSpeechResponseFormat.mp3.rawValue)
audioObjects.append(audioObject)
} catch {
print(error.localizedDescription)
Expand Down
4 changes: 2 additions & 2 deletions Demo/DemoChat/Sources/UI/DetailView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ struct ChatBubble: View {
.foregroundColor(userForegroundColor)
.background(userBackgroundColor)
.clipShape(RoundedRectangle(cornerRadius: 16, style: .continuous))
case .function:
case .tool:
Text(message.content)
.font(.footnote.monospaced())
.padding(.horizontal, 16)
Expand All @@ -223,7 +223,7 @@ struct DetailView_Previews: PreviewProvider {
Message(id: "1", role: .assistant, content: "Hello, how can I help you today?", createdAt: Date(timeIntervalSinceReferenceDate: 0)),
Message(id: "2", role: .user, content: "I need help with my subscription.", createdAt: Date(timeIntervalSinceReferenceDate: 100)),
Message(id: "3", role: .assistant, content: "Sure, what seems to be the problem with your subscription?", createdAt: Date(timeIntervalSinceReferenceDate: 200)),
Message(id: "4", role: .function, content:
Message(id: "4", role: .tool, content:
"""
get_current_weather({
"location": "Glasgow, Scotland",
Expand Down
10 changes: 5 additions & 5 deletions Demo/DemoChat/Sources/UI/Images/ImageCreationView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ public struct ImageCreationView: View {

@State private var prompt: String = ""
@State private var n: Int = 1
@State private var size: String
private var sizes = ["256x256", "512x512", "1024x1024"]
@State private var size = ImagesQuery.Size.allCases.first!

private var sizes = ImagesQuery.Size.allCases

public init(store: ImageStore) {
self.store = store
Expand All @@ -37,7 +37,7 @@ public struct ImageCreationView: View {
HStack {
Picker("Size", selection: $size) {
ForEach(sizes, id: \.self) {
Text($0)
Text($0.rawValue)
}
}
}
Expand All @@ -56,7 +56,7 @@ public struct ImageCreationView: View {
}
if !$store.images.isEmpty {
Section("Images") {
ForEach($store.images, id: \.self) { image in
ForEach($store.images, id: \.url) { image in
let urlString = image.wrappedValue.url ?? ""
if let imageURL = URL(string: urlString), UIApplication.shared.canOpenURL(imageURL) {
LinkPreview(previewURL: imageURL)
Expand Down
2 changes: 1 addition & 1 deletion Demo/DemoChat/Sources/UI/Misc/ListModelsView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public struct ListModelsView: View {

public var body: some View {
NavigationStack {
List($store.availableModels) { row in
List($store.availableModels.wrappedValue, id: \.id) { row in
Text(row.id)
}
.listStyle(.insetGrouped)
Expand Down
4 changes: 2 additions & 2 deletions Demo/DemoChat/Sources/UI/TextToSpeechView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ public struct TextToSpeechView: View {
}
if !$store.audioObjects.wrappedValue.isEmpty {
Section("Click to play, swipe to save:") {
ForEach(store.audioObjects) { object in
ForEach(store.audioObjects, id: \.id) { object in
HStack {
Text(object.prompt.capitalized)
Spacer()
Expand All @@ -122,7 +122,7 @@ public struct TextToSpeechView: View {
}
.swipeActions(edge: .trailing, allowsFullSwipe: false) {
Button {
presentUserDirectoryDocumentPicker(for: object.originResponse.audioData, filename: "GeneratedAudio.\(object.format)")
presentUserDirectoryDocumentPicker(for: object.originResponse.audio, filename: "GeneratedAudio.\(object.format)")
} label: {
Image(systemName: "square.and.arrow.down")
}
Expand Down
2 changes: 1 addition & 1 deletion Sources/OpenAI/OpenAI.swift
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ extension OpenAI {
return completion(.failure(OpenAIError.emptyData))
}

completion(.success(AudioSpeechResult(audioData: data)))
completion(.success(AudioSpeechResult(audio: data)))
}
task.resume()
} catch {
Expand Down
17 changes: 11 additions & 6 deletions Sources/OpenAI/Public/Models/AudioSpeechQuery.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@

import Foundation

/// Generates audio from the input text.
/// Learn more: [OpenAI Speech – Documentation](https://platform.openai.com/docs/api-reference/audio/createSpeech)
public struct AudioSpeechQuery: Codable, Equatable {
public struct AudioSpeechQuery: Codable {

/// Encapsulates the voices available for audio generation.
///
Expand Down Expand Up @@ -36,15 +37,19 @@ public struct AudioSpeechQuery: Codable, Equatable {
case aac
case flac
}

/// The text to generate audio for. The maximum length is 4096 characters.
public let input: String
/// One of the available TTS models: tts-1 or tts-1-hd
public let model: Model
/// The text to generate audio for. The maximum length is 4096 characters.
public let input: String?
/// The voice to use when generating the audio. Supported voices are alloy, echo, fable, onyx, nova, and shimmer.
/// The voice to use when generating the audio. Supported voices are alloy, echo, fable, onyx, nova, and shimmer. Previews of the voices are available in the Text to speech guide.
/// https://platform.openai.com/docs/guides/text-to-speech/voice-options
public let voice: AudioSpeechVoice
/// The format to audio in. Supported formats are mp3, opus, aac, and flac.
public let responseFormat: AudioSpeechResponseFormat
/// The speed of the generated audio. Enter a value between **0.25** and **4.0**. Default: **1.0**
/// Defaults to mp3
public let responseFormat: AudioSpeechResponseFormat?
/// The speed of the generated audio. Select a value from **0.25** to **4.0**. **1.0** is the default.
/// Defaults to 1
public let speed: String?

public enum CodingKeys: String, CodingKey {
Expand Down
8 changes: 5 additions & 3 deletions Sources/OpenAI/Public/Models/AudioSpeechResult.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@

import Foundation

public struct AudioSpeechResult {

/// The audio file content.
/// Learn more: [OpenAI Speech – Documentation](https://platform.openai.com/docs/api-reference/audio/createSpeech)
public struct AudioSpeechResult: Codable, Equatable {

/// Audio data for one of the following formats :`mp3`, `opus`, `aac`, `flac`
public let audioData: Data?
public let audio: Data
}
65 changes: 55 additions & 10 deletions Sources/OpenAI/Public/Models/AudioTranscriptionQuery.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,42 +7,87 @@

import Foundation

public enum AudioResponseFormat: String, Codable, Equatable {
public struct AudioTranscriptionQuery: Codable {

public enum ResponseFormat: String, Codable, Equatable, CaseIterable {
case json
case text
case verboseJson = "verbose_json"
case srt
case vtt
}

public struct AudioTranscriptionQuery: Codable, Equatable {
public typealias ResponseFormat = AudioResponseFormat

/// The audio file object (not file name) to transcribe, in one of these formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
public let file: Data
public let fileName: String
public let fileType: Self.FileType
/// ID of the model to use. Only whisper-1 is currently available.
public let model: Model
/// The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt.
/// Defaults to json
public let responseFormat: Self.ResponseFormat?

/// An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language.
public let prompt: String?
/// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.
/// Defaults to 0
public let temperature: Double?
/// The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.
/// https://platform.openai.com/docs/guides/speech-to-text/prompting
public let language: String?
public init(file: Data, fileName: String, model: Model, prompt: String? = nil, temperature: Double? = nil, language: String? = nil, responseFormat: Self.ResponseFormat? = nil) {

public init(file: Data, fileType: Self.FileType, model: Model, prompt: String? = nil, temperature: Double? = nil, language: String? = nil, responseFormat: Self.ResponseFormat? = nil) {
self.file = file
self.fileName = fileName
self.fileType = fileType
self.model = model
self.prompt = prompt
self.temperature = temperature
self.language = language
self.responseFormat = responseFormat
}

public enum FileType: String, Codable, Equatable, CaseIterable {
case flac
case mp3, mpga
case mp4, m4a
case mpeg
case ogg
case wav
case webm

var fileName: String { get {
var fileName = "speech."
switch self {
case .mpga:
fileName += Self.mp3.rawValue
case .m4a:
fileName += Self.mp4.rawValue
default:
fileName += self.rawValue
}

return fileName
}}

var contentType: String { get {
var contentType = "audio/"
switch self {
case .mpga:
contentType += Self.mp3.rawValue
case .m4a:
contentType += Self.mp4.rawValue
default:
contentType += self.rawValue
}

return contentType
}}
}
}

extension AudioTranscriptionQuery: MultipartFormDataBodyEncodable {

func encode(boundary: String) -> Data {
let bodyBuilder = MultipartFormDataBodyBuilder(boundary: boundary, entries: [
.file(paramName: "file", fileName: fileName, fileData: file, contentType: "audio/mpeg"),
.file(paramName: "file", fileName: fileType.fileName, fileData: file, contentType: fileType.contentType),
.string(paramName: "model", value: model),
.string(paramName: "prompt", value: prompt),
.string(paramName: "temperature", value: temperature),
Expand Down
3 changes: 2 additions & 1 deletion Sources/OpenAI/Public/Models/AudioTranscriptionResult.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import Foundation

public struct AudioTranscriptionResult: Codable, Equatable {


/// The transcribed text.
public let text: String
}
25 changes: 17 additions & 8 deletions Sources/OpenAI/Public/Models/AudioTranslationQuery.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,29 @@

import Foundation

public struct AudioTranslationQuery: Codable, Equatable {
public typealias ResponseFormat = AudioResponseFormat

/// Translates audio into English.
public struct AudioTranslationQuery: Codable {
public typealias FileType = AudioTranscriptionQuery.FileType
public typealias ResponseFormat = AudioTranscriptionQuery.ResponseFormat

/// The audio file object (not file name) translate, in one of these formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
public let file: Data
public let fileName: String
public let fileType: Self.FileType
/// ID of the model to use. Only whisper-1 is currently available.
public let model: Model

/// The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt.
/// Defaults to json
public let responseFormat: Self.ResponseFormat?
/// An optional text to guide the model's style or continue a previous audio segment. The prompt should be in English.
/// https://platform.openai.com/docs/guides/speech-to-text/prompting
public let prompt: String?
/// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.
/// Defaults to 0
public let temperature: Double?

public init(file: Data, fileName: String, model: Model, prompt: String? = nil, temperature: Double? = nil, responseFormat: Self.ResponseFormat? = nil) {
public init(file: Data, fileType: Self.FileType, model: Model, prompt: String? = nil, temperature: Double? = nil, responseFormat: Self.ResponseFormat? = nil) {
self.file = file
self.fileName = fileName
self.fileType = fileType
self.model = model
self.prompt = prompt
self.temperature = temperature
Expand All @@ -32,7 +41,7 @@ extension AudioTranslationQuery: MultipartFormDataBodyEncodable {

func encode(boundary: String) -> Data {
let bodyBuilder = MultipartFormDataBodyBuilder(boundary: boundary, entries: [
.file(paramName: "file", fileName: fileName, fileData: file, contentType: "audio/mpeg"),
.file(paramName: "file", fileName: fileType.fileName, fileData: file, contentType: fileType.contentType),
.string(paramName: "model", value: model),
.string(paramName: "prompt", value: prompt),
.string(paramName: "response_format", value: responseFormat),
Expand Down
Loading
Loading