From c7ce7e67018839fafa7ed9845e6bb1f4fc5828aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Do=C4=9Fu=20Emre=20Demir=C3=A7ivi?= Date: Wed, 10 Apr 2024 12:41:50 +0300 Subject: [PATCH 1/6] Fixes the `m4a` content type sent as `mp4` instead --- Sources/OpenAI/Public/Models/AudioTranscriptionQuery.swift | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Sources/OpenAI/Public/Models/AudioTranscriptionQuery.swift b/Sources/OpenAI/Public/Models/AudioTranscriptionQuery.swift index 1acdd724..d1449d4a 100644 --- a/Sources/OpenAI/Public/Models/AudioTranscriptionQuery.swift +++ b/Sources/OpenAI/Public/Models/AudioTranscriptionQuery.swift @@ -58,8 +58,6 @@ public enum ResponseFormat: String, Codable, Equatable, CaseIterable { switch self { case .mpga: fileName += Self.mp3.rawValue - case .m4a: - fileName += Self.mp4.rawValue default: fileName += self.rawValue } @@ -72,8 +70,6 @@ public enum ResponseFormat: String, Codable, Equatable, CaseIterable { switch self { case .mpga: contentType += Self.mp3.rawValue - case .m4a: - contentType += Self.mp4.rawValue default: contentType += self.rawValue } From e52dea9f9dad59378464be9a72cc005f3e708f14 Mon Sep 17 00:00:00 2001 From: James J Kalafus Date: Fri, 16 Feb 2024 15:35:10 -0500 Subject: [PATCH 2/6] gpt-4-vision-preview support fix and test https://github.com/MacPaw/OpenAI/pull/169 https://github.com/MacPaw/OpenAI/issues/174 --- Sources/OpenAI/Public/Models/ChatQuery.swift | 67 ++++++++++++++----- Sources/OpenAI/Public/Models/ChatResult.swift | 11 +-- Tests/OpenAITests/OpenAITestsDecoder.swift | 46 ++++++++++++- 3 files changed, 100 insertions(+), 24 deletions(-) diff --git a/Sources/OpenAI/Public/Models/ChatQuery.swift b/Sources/OpenAI/Public/Models/ChatQuery.swift index 7c6078bc..fac3e486 100644 --- a/Sources/OpenAI/Public/Models/ChatQuery.swift +++ b/Sources/OpenAI/Public/Models/ChatQuery.swift @@ -115,12 +115,12 @@ public struct ChatQuery: Equatable, Codable, Streamable { case assistant(Self.ChatCompletionAssistantMessageParam) case tool(Self.ChatCompletionToolMessageParam) - public var content: Self.ChatCompletionUserMessageParam.Content? { get { // TODO: String type except for .user + public var content: Self.ChatCompletionUserMessageParam.Content? { get { switch self { case .system(let systemMessage): return Self.ChatCompletionUserMessageParam.Content.string(systemMessage.content) case .user(let userMessage): - return userMessage.content // TODO: Content type + return userMessage.content case .assistant(let assistantMessage): if let content = assistantMessage.content { return Self.ChatCompletionUserMessageParam.Content.string(content) @@ -178,7 +178,6 @@ public struct ChatQuery: Equatable, Codable, Streamable { public init?( role: Role, content: String? = nil, - imageUrl: URL? = nil, name: String? = nil, toolCalls: [Self.ChatCompletionAssistantMessageParam.ChatCompletionMessageToolCallParam]? = nil, toolCallId: String? = nil @@ -193,8 +192,6 @@ public struct ChatQuery: Equatable, Codable, Streamable { case .user: if let content { self = .user(.init(content: .init(string: content), name: name)) - } else if let imageUrl { - self = .user(.init(content: .init(chatCompletionContentPartImageParam: .init(imageUrl: .init(url: imageUrl.absoluteString, detail: .auto))), name: name)) } else { return nil } @@ -209,6 +206,20 @@ public struct ChatQuery: Equatable, Codable, Streamable { } } + public init?( + role: Role, + content: [ChatCompletionUserMessageParam.Content.VisionContent], + name: String? = nil + ) { + switch role { + case .user: + self = .user(.init(content: .vision(content), name: name)) + default: + return nil + } + + } + private init?( content: String, role: Role, @@ -330,8 +341,7 @@ public struct ChatQuery: Equatable, Codable, Streamable { public enum Content: Codable, Equatable { case string(String) - case chatCompletionContentPartTextParam(ChatCompletionContentPartTextParam) - case chatCompletionContentPartImageParam(ChatCompletionContentPartImageParam) + case vision([VisionContent]) public var string: String? { get { switch self { @@ -342,6 +352,33 @@ public struct ChatQuery: Equatable, Codable, Streamable { } }} + public init(string: String) { + self = .string(string) + } + + public init(vision: [VisionContent]) { + self = .vision(vision) + } + + public enum CodingKeys: CodingKey { + case string + case vision + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + switch self { + case .string(let a0): + try container.encode(a0) + case .vision(let a0): + try container.encode(a0) + } + } + + public enum VisionContent: Codable, Equatable { + case chatCompletionContentPartTextParam(ChatCompletionContentPartTextParam) + case chatCompletionContentPartImageParam(ChatCompletionContentPartImageParam) + public var text: String? { get { switch self { case .chatCompletionContentPartTextParam(let text): @@ -360,10 +397,6 @@ public struct ChatQuery: Equatable, Codable, Streamable { } }} - public init(string: String) { - self = .string(string) - } - public init(chatCompletionContentPartTextParam: ChatCompletionContentPartTextParam) { self = .chatCompletionContentPartTextParam(chatCompletionContentPartTextParam) } @@ -375,8 +408,6 @@ public struct ChatQuery: Equatable, Codable, Streamable { public func encode(to encoder: Encoder) throws { var container = encoder.singleValueContainer() switch self { - case .string(let a0): - try container.encode(a0) case .chatCompletionContentPartTextParam(let a0): try container.encode(a0) case .chatCompletionContentPartImageParam(let a0): @@ -385,7 +416,6 @@ public struct ChatQuery: Equatable, Codable, Streamable { } enum CodingKeys: CodingKey { - case string case chatCompletionContentPartTextParam case chatCompletionContentPartImageParam } @@ -409,7 +439,7 @@ public struct ChatQuery: Equatable, Codable, Streamable { public init(imageUrl: ImageURL) { self.imageUrl = imageUrl - self.type = "imageUrl" + self.type = "image_url" } public struct ImageURL: Codable, Equatable { @@ -424,6 +454,12 @@ public struct ChatQuery: Equatable, Codable, Streamable { self.detail = detail } + public init(url: Data, detail: Detail) { + self.init( + url: "data:image/jpeg;base64,\(url.base64EncodedString())", + detail: detail) + } + public enum Detail: String, Codable, Equatable, CaseIterable { case auto case low @@ -438,6 +474,7 @@ public struct ChatQuery: Equatable, Codable, Streamable { } } } + } internal struct ChatCompletionMessageParam: Codable, Equatable { typealias Role = ChatQuery.ChatCompletionMessageParam.Role diff --git a/Sources/OpenAI/Public/Models/ChatResult.swift b/Sources/OpenAI/Public/Models/ChatResult.swift index 5e42c37c..c2f7c12d 100644 --- a/Sources/OpenAI/Public/Models/ChatResult.swift +++ b/Sources/OpenAI/Public/Models/ChatResult.swift @@ -145,15 +145,10 @@ extension ChatQuery.ChatCompletionMessageParam.ChatCompletionUserMessageParam.Co return } catch {} do { - let text = try container.decode(ChatCompletionContentPartTextParam.self) - self = .chatCompletionContentPartTextParam(text) + let vision = try container.decode([VisionContent].self) + self = .vision(vision) return } catch {} - do { - let image = try container.decode(ChatCompletionContentPartImageParam.self) - self = .chatCompletionContentPartImageParam(image) - return - } catch {} - throw DecodingError.typeMismatch(Self.self, .init(codingPath: [Self.CodingKeys.string, CodingKeys.chatCompletionContentPartTextParam, CodingKeys.chatCompletionContentPartImageParam], debugDescription: "Content: expected String, ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam")) + throw DecodingError.typeMismatch(Self.self, .init(codingPath: [Self.CodingKeys.string, Self.CodingKeys.vision], debugDescription: "Content: expected String || Vision")) } } diff --git a/Tests/OpenAITests/OpenAITestsDecoder.swift b/Tests/OpenAITests/OpenAITestsDecoder.swift index 25e73c50..9d7c8354 100644 --- a/Tests/OpenAITests/OpenAITestsDecoder.swift +++ b/Tests/OpenAITests/OpenAITestsDecoder.swift @@ -142,7 +142,51 @@ class OpenAITestsDecoder: XCTestCase { XCTAssertEqual(imageQueryAsDict, expectedValueAsDict) } - + + func testChatQueryWithVision() async throws { + let chatQuery = ChatQuery(messages: [ +// .init(role: .user, content: [ +// .chatCompletionContentPartTextParam(.init(text: "What's in this image?")), +// .chatCompletionContentPartImageParam(.init(imageUrl: .init(url: "https://some.url/image.jpeg", detail: .auto))) +// ])! + .user(.init(content: .vision([ + .chatCompletionContentPartTextParam(.init(text: "What's in this image?")), + .chatCompletionContentPartImageParam(.init(imageUrl: .init(url: "https://some.url/image.jpeg", detail: .auto))) + ]))) + ], model: Model.gpt4_vision_preview, maxTokens: 300) + let expectedValue = """ + { + "model": "gpt-4-vision-preview", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What's in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": "https://some.url/image.jpeg", + "detail": "auto" + } + } + ] + } + ], + "max_tokens": 300, + "stream": false + } + """ + + // To compare serialized JSONs we first convert them both into NSDictionary which are comparable (unline native swift dictionaries) + let chatQueryAsDict = try jsonDataAsNSDictionary(JSONEncoder().encode(chatQuery)) + let expectedValueAsDict = try jsonDataAsNSDictionary(expectedValue.data(using: .utf8)!) + + XCTAssertEqual(chatQueryAsDict, expectedValueAsDict) + } + func testChatQueryWithFunctionCall() async throws { let chatQuery = ChatQuery( messages: [ From 55f54cb30509403c5f5f1597fb9ecc5f9626e6b2 Mon Sep 17 00:00:00 2001 From: Quill Zhou Date: Wed, 10 Apr 2024 15:19:19 +0800 Subject: [PATCH 3/6] Update Models.swift --- Sources/OpenAI/Public/Models/Models/Models.swift | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Sources/OpenAI/Public/Models/Models/Models.swift b/Sources/OpenAI/Public/Models/Models/Models.swift index 514dc99b..6d18cc67 100644 --- a/Sources/OpenAI/Public/Models/Models/Models.swift +++ b/Sources/OpenAI/Public/Models/Models/Models.swift @@ -12,7 +12,11 @@ public extension Model { // Chat Completion // GPT-4 - /// `gpt-4-turbo`, the latest gpt-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling and more. Maximum of 4096 output tokens + /// `gpt-4-turbo`, The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling and more. Context window: 128,000 tokens + static let gpt4_turbo = "gpt-4-turbo" + + /// `gpt-4-turbo`, gpt-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling and more. Maximum of 4096 output tokens + @available(*, deprecated, message: "Please upgrade to the newer model") static let gpt4_turbo_preview = "gpt-4-turbo-preview" /// `gpt-4-vision-preview`, able to understand images, in addition to all other GPT-4 Turbo capabilities. From b17b8203bea4133f8f43a1908f2647427d59d930 Mon Sep 17 00:00:00 2001 From: Kelvin Lau Date: Mon, 13 May 2024 22:50:17 -0700 Subject: [PATCH 4/6] Adds gpt-4o --- Sources/OpenAI/Public/Models/Models/Models.swift | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Sources/OpenAI/Public/Models/Models/Models.swift b/Sources/OpenAI/Public/Models/Models/Models.swift index 6d18cc67..f773f3e8 100644 --- a/Sources/OpenAI/Public/Models/Models/Models.swift +++ b/Sources/OpenAI/Public/Models/Models/Models.swift @@ -12,6 +12,9 @@ public extension Model { // Chat Completion // GPT-4 + /// `gpt-4o`, currently the most advanced, multimodal flagship model that's cheaper and faster than GPT-4 Turbo. + static let gpt4_o = "gpt-4o" + /// `gpt-4-turbo`, The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling and more. Context window: 128,000 tokens static let gpt4_turbo = "gpt-4-turbo" From de3cb7975101c6cdc57d05a04e0d9cd2a884a575 Mon Sep 17 00:00:00 2001 From: Vishnu Ravi Date: Wed, 15 May 2024 08:27:58 -0400 Subject: [PATCH 5/6] Fixes failing unit test --- Tests/OpenAITests/OpenAITests.swift | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Tests/OpenAITests/OpenAITests.swift b/Tests/OpenAITests/OpenAITests.swift index 3416f24d..56e9a4c1 100644 --- a/Tests/OpenAITests/OpenAITests.swift +++ b/Tests/OpenAITests/OpenAITests.swift @@ -373,7 +373,9 @@ class OpenAITests: XCTestCase { let jsonRequest = JSONRequest(body: completionQuery, url: URL(string: "http://google.com")!) let urlRequest = try jsonRequest.build(token: configuration.token, organizationIdentifier: configuration.organizationIdentifier, timeoutInterval: configuration.timeoutInterval) - XCTAssertEqual(urlRequest.value(forHTTPHeaderField: "Authorization"), "Bearer \(configuration.token)") + let unwrappedToken = try XCTUnwrap(configuration.token) + + XCTAssertEqual(urlRequest.value(forHTTPHeaderField: "Authorization"), "Bearer \(unwrappedToken)") XCTAssertEqual(urlRequest.value(forHTTPHeaderField: "Content-Type"), "application/json") XCTAssertEqual(urlRequest.value(forHTTPHeaderField: "OpenAI-Organization"), configuration.organizationIdentifier) XCTAssertEqual(urlRequest.timeoutInterval, configuration.timeoutInterval) @@ -385,7 +387,9 @@ class OpenAITests: XCTestCase { let jsonRequest = MultipartFormDataRequest(body: completionQuery, url: URL(string: "http://google.com")!) let urlRequest = try jsonRequest.build(token: configuration.token, organizationIdentifier: configuration.organizationIdentifier, timeoutInterval: configuration.timeoutInterval) - XCTAssertEqual(urlRequest.value(forHTTPHeaderField: "Authorization"), "Bearer \(configuration.token)") + let unwrappedToken = try XCTUnwrap(configuration.token) + + XCTAssertEqual(urlRequest.value(forHTTPHeaderField: "Authorization"), "Bearer \(unwrappedToken)") XCTAssertEqual(urlRequest.value(forHTTPHeaderField: "OpenAI-Organization"), configuration.organizationIdentifier) XCTAssertEqual(urlRequest.timeoutInterval, configuration.timeoutInterval) } From 9fc0277256c7bcdb0f64b0055ac88082a8160790 Mon Sep 17 00:00:00 2001 From: Vishnu Ravi Date: Wed, 15 May 2024 13:20:08 -0400 Subject: [PATCH 6/6] Update demo app --- Demo/Demo.xcodeproj/project.pbxproj | 4 ++-- Demo/DemoChat/Package.swift | 4 ++-- Demo/DemoChat/Sources/UI/DetailView.swift | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Demo/Demo.xcodeproj/project.pbxproj b/Demo/Demo.xcodeproj/project.pbxproj index edde7d8d..d6416d86 100644 --- a/Demo/Demo.xcodeproj/project.pbxproj +++ b/Demo/Demo.xcodeproj/project.pbxproj @@ -315,7 +315,7 @@ "INFOPLIST_KEY_UIStatusBarStyle[sdk=iphonesimulator*]" = UIStatusBarStyleDefault; INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; - IPHONEOS_DEPLOYMENT_TARGET = 16.4; + IPHONEOS_DEPLOYMENT_TARGET = 17.0; LD_RUNPATH_SEARCH_PATHS = "@executable_path/Frameworks"; "LD_RUNPATH_SEARCH_PATHS[sdk=macosx*]" = "@executable_path/../Frameworks"; MACOSX_DEPLOYMENT_TARGET = 13.3; @@ -354,7 +354,7 @@ "INFOPLIST_KEY_UIStatusBarStyle[sdk=iphonesimulator*]" = UIStatusBarStyleDefault; INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; - IPHONEOS_DEPLOYMENT_TARGET = 16.4; + IPHONEOS_DEPLOYMENT_TARGET = 17.0; LD_RUNPATH_SEARCH_PATHS = "@executable_path/Frameworks"; "LD_RUNPATH_SEARCH_PATHS[sdk=macosx*]" = "@executable_path/../Frameworks"; MACOSX_DEPLOYMENT_TARGET = 13.3; diff --git a/Demo/DemoChat/Package.swift b/Demo/DemoChat/Package.swift index b111a7a3..b63512d3 100644 --- a/Demo/DemoChat/Package.swift +++ b/Demo/DemoChat/Package.swift @@ -1,11 +1,11 @@ -// swift-tools-version: 5.8 +// swift-tools-version: 5.9 // The swift-tools-version declares the minimum version of Swift required to build this package. import PackageDescription let package = Package( name: "DemoChat", - platforms: [.macOS(.v13), .iOS(.v16)], + platforms: [.macOS(.v13), .iOS(.v17)], products: [ .library( name: "DemoChat", diff --git a/Demo/DemoChat/Sources/UI/DetailView.swift b/Demo/DemoChat/Sources/UI/DetailView.swift index 7aa44479..2e138d72 100644 --- a/Demo/DemoChat/Sources/UI/DetailView.swift +++ b/Demo/DemoChat/Sources/UI/DetailView.swift @@ -19,7 +19,7 @@ struct DetailView: View { @State private var showsModelSelectionSheet = false @State private var selectedChatModel: Model = .gpt4_0613 - private static let availableChatModels: [Model] = [.gpt3_5Turbo, .gpt4] + private static let availableChatModels: [Model] = [.gpt3_5Turbo, .gpt4, .gpt4_o] let conversation: Conversation let error: Error?