diff --git a/Demo/DemoChat/Sources/UI/DetailView.swift b/Demo/DemoChat/Sources/UI/DetailView.swift index 55ff60af..9e2a07e9 100644 --- a/Demo/DemoChat/Sources/UI/DetailView.swift +++ b/Demo/DemoChat/Sources/UI/DetailView.swift @@ -176,6 +176,7 @@ struct DetailView: View { .frame(width: 24, height: 24) .padding(.trailing) } + .disabled(inputText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty) } .padding(.bottom) } @@ -183,7 +184,12 @@ struct DetailView: View { private func tapSendMessage( scrollViewProxy: ScrollViewProxy ) { - sendMessage(inputText, selectedChatModel) + let message = inputText.trimmingCharacters(in: .whitespacesAndNewlines) + if message.isEmpty { + return + } + + sendMessage(message, selectedChatModel) inputText = "" // if let lastMessage = conversation.messages.last { diff --git a/README.md b/README.md index 1d27a250..8617e5a9 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,9 @@ This repository contains Swift community-maintained implementation over [OpenAI] - [Chats](#chats) - [Chats Streaming](#chats-streaming) - [Images](#images) + - [Create Image](#create-image) + - [Create Image Edit](#create-image-edit) + - [Create Image Variation](#create-image-variation) - [Audio](#audio) - [Audio Transcriptions](#audio-transcriptions) - [Audio Translations](#audio-translations) @@ -129,7 +132,7 @@ struct CompletionsResult: Codable, Equatable { **Example** ```swift -let query = CompletionsQuery(model: .textDavinci_003, prompt: "What is 42?", temperature: 0, max_tokens: 100, top_p: 1, frequency_penalty: 0, presence_penalty: 0, stop: ["\\n"]) +let query = CompletionsQuery(model: .textDavinci_003, prompt: "What is 42?", temperature: 0, maxTokens: 100, topP: 1, frequencyPenalty: 0, presencePenalty: 0, stop: ["\\n"]) openAI.completions(query: query) { result in //Handle result here } @@ -385,6 +388,8 @@ Given a prompt and/or an input image, the model will generate a new image. As Artificial Intelligence continues to develop, so too does the intriguing concept of Dall-E. Developed by OpenAI, a research lab for artificial intelligence purposes, Dall-E has been classified as an AI system that can generate images based on descriptions provided by humans. With its potential applications spanning from animation and illustration to design and engineering - not to mention the endless possibilities in between - it's easy to see why there is such excitement over this new technology. +### Create Image + **Request** ```swift @@ -409,6 +414,7 @@ struct ImagesResult: Codable, Equatable { public let data: [URLResult] } ``` + **Example** ```swift @@ -433,6 +439,79 @@ let result = try await openAI.images(query: query) ![Generated Image](https://user-images.githubusercontent.com/1411778/213134082-ba988a72-fca0-4213-8805-63e5f8324cab.png) +### Create Image Edit + +Creates an edited or extended image given an original image and a prompt. + +**Request** + +```swift +public struct ImageEditsQuery: Codable { + /// The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask is not provided, image must have transparency, which will be used as the mask. + public let image: Data + public let fileName: String + /// An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where image should be edited. Must be a valid PNG file, less than 4MB, and have the same dimensions as image. + public let mask: Data? + public let maskFileName: String? + /// A text description of the desired image(s). The maximum length is 1000 characters. + public let prompt: String + /// The number of images to generate. Must be between 1 and 10. + public let n: Int? + /// The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024. + public let size: String? +} +``` + +**Response** + +Uses the ImagesResult response similarly to ImagesQuery. + +**Example** + +```swift +let data = image.pngData() +let query = ImagesEditQuery(image: data, fileName: "whitecat.png", prompt: "White cat with heterochromia sitting on the kitchen table with a bowl of food", n: 1, size: "1024x1024") +openAI.imageEdits(query: query) { result in + //Handle result here +} +//or +let result = try await openAI.imageEdits(query: query) +``` + +### Create Image Variation + +Creates a variation of a given image. + +**Request** + +```swift +public struct ImageVariationsQuery: Codable { + /// The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask is not provided, image must have transparency, which will be used as the mask. + public let image: Data + public let fileName: String + /// The number of images to generate. Must be between 1 and 10. + public let n: Int? + /// The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024. + public let size: String? +} +``` + +**Response** + +Uses the ImagesResult response similarly to ImagesQuery. + +**Example** + +```swift +let data = image.pngData() +let query = ImagesVariationQuery(image: data, fileName: "whitecat.png", n: 1, size: "1024x1024") +openAI.imageVariations(query: query) { result in + //Handle result here +} +//or +let result = try await openAI.imageVariations(query: query) +``` + Review [Images Documentation](https://platform.openai.com/docs/api-reference/images) for more info. ### Audio diff --git a/Sources/OpenAI/OpenAI.swift b/Sources/OpenAI/OpenAI.swift index 5e88ffcf..720f0f23 100644 --- a/Sources/OpenAI/OpenAI.swift +++ b/Sources/OpenAI/OpenAI.swift @@ -68,6 +68,14 @@ final public class OpenAI: OpenAIProtocol { performRequest(request: JSONRequest(body: query, url: buildURL(path: .images)), completion: completion) } + public func imageEdits(query: ImageEditsQuery, completion: @escaping (Result) -> Void) { + performRequest(request: MultipartFormDataRequest(body: query, url: buildURL(path: .imageEdits)), completion: completion) + } + + public func imageVariations(query: ImageVariationsQuery, completion: @escaping (Result) -> Void) { + performRequest(request: MultipartFormDataRequest(body: query, url: buildURL(path: .imageVariations)), completion: completion) + } + public func embeddings(query: EmbeddingsQuery, completion: @escaping (Result) -> Void) { performRequest(request: JSONRequest(body: query, url: buildURL(path: .embeddings)), completion: completion) } @@ -180,7 +188,6 @@ typealias APIPath = String extension APIPath { static let completions = "/v1/completions" - static let images = "/v1/images/generations" static let embeddings = "/v1/embeddings" static let chats = "/v1/chat/completions" static let edits = "/v1/edits" @@ -190,6 +197,10 @@ extension APIPath { static let audioTranscriptions = "/v1/audio/transcriptions" static let audioTranslations = "/v1/audio/translations" + static let images = "/v1/images/generations" + static let imageEdits = "/v1/images/edits" + static let imageVariations = "/v1/images/variations" + func withPath(_ path: String) -> String { self + "/" + path } diff --git a/Sources/OpenAI/Private/MultipartFormDataBodyBuilder.swift b/Sources/OpenAI/Private/MultipartFormDataBodyBuilder.swift index 741968e7..73832b70 100644 --- a/Sources/OpenAI/Private/MultipartFormDataBodyBuilder.swift +++ b/Sources/OpenAI/Private/MultipartFormDataBodyBuilder.swift @@ -32,11 +32,13 @@ private extension MultipartFormDataEntry { var body = Data() switch self { case .file(let paramName, let fileName, let fileData, let contentType): - body.append("--\(boundary)\r\n") - body.append("Content-Disposition: form-data; name=\"\(paramName)\"; filename=\"\(fileName)\"\r\n") - body.append("Content-Type: \(contentType)\r\n\r\n") - body.append(fileData) - body.append("\r\n") + if let fileName, let fileData { + body.append("--\(boundary)\r\n") + body.append("Content-Disposition: form-data; name=\"\(paramName)\"; filename=\"\(fileName)\"\r\n") + body.append("Content-Type: \(contentType)\r\n\r\n") + body.append(fileData) + body.append("\r\n") + } case .string(let paramName, let value): if let value { body.append("--\(boundary)\r\n") diff --git a/Sources/OpenAI/Private/MultipartFormDataEntry.swift b/Sources/OpenAI/Private/MultipartFormDataEntry.swift index 3e1f7de1..93e9120c 100644 --- a/Sources/OpenAI/Private/MultipartFormDataEntry.swift +++ b/Sources/OpenAI/Private/MultipartFormDataEntry.swift @@ -9,6 +9,6 @@ import Foundation enum MultipartFormDataEntry { - case file(paramName: String, fileName: String, fileData: Data, contentType: String), + case file(paramName: String, fileName: String?, fileData: Data?, contentType: String), string(paramName: String, value: Any?) } diff --git a/Sources/OpenAI/Private/StreamingSession.swift b/Sources/OpenAI/Private/StreamingSession.swift index 55902f09..a69e46cf 100644 --- a/Sources/OpenAI/Private/StreamingSession.swift +++ b/Sources/OpenAI/Private/StreamingSession.swift @@ -28,6 +28,8 @@ final class StreamingSession: NSObject, Identifiable, URLSe return session }() + private var previousChunkBuffer = "" + init(urlRequest: URLRequest) { self.urlRequest = urlRequest } @@ -47,14 +49,25 @@ final class StreamingSession: NSObject, Identifiable, URLSe onProcessingError?(self, StreamingError.unknownContent) return } - let jsonObjects = stringContent + processJSON(from: stringContent) + } + +} + +extension StreamingSession { + + private func processJSON(from stringContent: String) { + let jsonObjects = "\(previousChunkBuffer)\(stringContent)" .components(separatedBy: "data:") .filter { $0.isEmpty == false } .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } + + previousChunkBuffer = "" + guard jsonObjects.isEmpty == false, jsonObjects.first != streamingCompletionMarker else { return } - jsonObjects.forEach { jsonContent in + jsonObjects.enumerated().forEach { (index, jsonContent) in guard jsonContent != streamingCompletionMarker else { return } @@ -77,9 +90,14 @@ final class StreamingSession: NSObject, Identifiable, URLSe let decoded = try JSONDecoder().decode(APIErrorResponse.self, from: jsonData) onProcessingError?(self, decoded) } catch { - onProcessingError?(self, apiError) + if index == jsonObjects.count - 1 { + previousChunkBuffer = "data: \(jsonContent)" // Chunk ends in a partial JSON + } else { + onProcessingError?(self, apiError) + } } } } } + } diff --git a/Sources/OpenAI/Public/Models/ImageEditsQuery.swift b/Sources/OpenAI/Public/Models/ImageEditsQuery.swift new file mode 100644 index 00000000..a208c8d7 --- /dev/null +++ b/Sources/OpenAI/Public/Models/ImageEditsQuery.swift @@ -0,0 +1,46 @@ +// +// ImageEditsQuery.swift +// +// +// Created by Aled Samuel on 24/04/2023. +// + +import Foundation + +public struct ImageEditsQuery: Codable { + /// The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask is not provided, image must have transparency, which will be used as the mask. + public let image: Data + public let fileName: String + /// An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where image should be edited. Must be a valid PNG file, less than 4MB, and have the same dimensions as image. + public let mask: Data? + public let maskFileName: String? + /// A text description of the desired image(s). The maximum length is 1000 characters. + public let prompt: String + /// The number of images to generate. Must be between 1 and 10. + public let n: Int? + /// The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024. + public let size: String? + + public init(image: Data, fileName: String, mask: Data? = nil, maskFileName: String? = nil, prompt: String, n: Int? = nil, size: String? = nil) { + self.image = image + self.fileName = fileName + self.mask = mask + self.maskFileName = maskFileName + self.prompt = prompt + self.n = n + self.size = size + } +} + +extension ImageEditsQuery: MultipartFormDataBodyEncodable { + func encode(boundary: String) -> Data { + let bodyBuilder = MultipartFormDataBodyBuilder(boundary: boundary, entries: [ + .file(paramName: "image", fileName: fileName, fileData: image, contentType: "image/png"), + .file(paramName: "mask", fileName: maskFileName, fileData: mask, contentType: "image/png"), + .string(paramName: "prompt", value: prompt), + .string(paramName: "n", value: n), + .string(paramName: "size", value: size) + ]) + return bodyBuilder.build() + } +} diff --git a/Sources/OpenAI/Public/Models/ImageVariationsQuery.swift b/Sources/OpenAI/Public/Models/ImageVariationsQuery.swift new file mode 100644 index 00000000..35276a06 --- /dev/null +++ b/Sources/OpenAI/Public/Models/ImageVariationsQuery.swift @@ -0,0 +1,36 @@ +// +// ImageVariationsQuery.swift +// +// +// Created by Aled Samuel on 24/04/2023. +// + +import Foundation + +public struct ImageVariationsQuery: Codable { + /// The image to edit. Must be a valid PNG file, less than 4MB, and square. + public let image: Data + public let fileName: String + /// The number of images to generate. Must be between 1 and 10. + public let n: Int? + /// The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024. + public let size: String? + + public init(image: Data, fileName: String, n: Int? = nil, size: String? = nil) { + self.image = image + self.fileName = fileName + self.n = n + self.size = size + } +} + +extension ImageVariationsQuery: MultipartFormDataBodyEncodable { + func encode(boundary: String) -> Data { + let bodyBuilder = MultipartFormDataBodyBuilder(boundary: boundary, entries: [ + .file(paramName: "image", fileName: fileName, fileData: image, contentType: "image/png"), + .string(paramName: "n", value: n), + .string(paramName: "size", value: size) + ]) + return bodyBuilder.build() + } +} diff --git a/Sources/OpenAI/Public/Protocols/OpenAIProtocol+Async.swift b/Sources/OpenAI/Public/Protocols/OpenAIProtocol+Async.swift index 75cb8a52..909704ca 100644 --- a/Sources/OpenAI/Public/Protocols/OpenAIProtocol+Async.swift +++ b/Sources/OpenAI/Public/Protocols/OpenAIProtocol+Async.swift @@ -53,6 +53,36 @@ public extension OpenAIProtocol { } } } + + func imageEdits( + query: ImageEditsQuery + ) async throws -> ImagesResult { + try await withCheckedThrowingContinuation { continuation in + imageEdits(query: query) { result in + switch result { + case let .success(success): + return continuation.resume(returning: success) + case let .failure(failure): + return continuation.resume(throwing: failure) + } + } + } + } + + func imageVariations( + query: ImageVariationsQuery + ) async throws -> ImagesResult { + try await withCheckedThrowingContinuation { continuation in + imageVariations(query: query) { result in + switch result { + case let .success(success): + return continuation.resume(returning: success) + case let .failure(failure): + return continuation.resume(throwing: failure) + } + } + } + } func embeddings( query: EmbeddingsQuery diff --git a/Sources/OpenAI/Public/Protocols/OpenAIProtocol+Combine.swift b/Sources/OpenAI/Public/Protocols/OpenAIProtocol+Combine.swift index 4a258032..da8b7dfb 100644 --- a/Sources/OpenAI/Public/Protocols/OpenAIProtocol+Combine.swift +++ b/Sources/OpenAI/Public/Protocols/OpenAIProtocol+Combine.swift @@ -42,6 +42,20 @@ public extension OpenAIProtocol { } .eraseToAnyPublisher() } + + func imageEdits(query: ImageEditsQuery) -> AnyPublisher { + Future { + imageEdits(query: query, completion: $0) + } + .eraseToAnyPublisher() + } + + func imageVariations(query: ImageVariationsQuery) -> AnyPublisher { + Future { + imageVariations(query: query, completion: $0) + } + .eraseToAnyPublisher() + } func embeddings(query: EmbeddingsQuery) -> AnyPublisher { Future { diff --git a/Sources/OpenAI/Public/Protocols/OpenAIProtocol.swift b/Sources/OpenAI/Public/Protocols/OpenAIProtocol.swift index 785d5641..6519e8fe 100644 --- a/Sources/OpenAI/Public/Protocols/OpenAIProtocol.swift +++ b/Sources/OpenAI/Public/Protocols/OpenAIProtocol.swift @@ -56,11 +56,45 @@ public protocol OpenAIProtocol { ``` - Parameters: - - query: An `ImagesQuery` object containing the input parameters for the API request. This includes the query parameters such as the model, text prompt, image size, and other settings. + - query: An `ImagesQuery` object containing the input parameters for the API request. This includes the query parameters such as the text prompt, image size, and other settings. - completion: A closure which receives the result when the API request finishes. The closure's parameter, `Result`, will contain either the `ImagesResult` object with the generated images, or an error if the request failed. **/ func images(query: ImagesQuery, completion: @escaping (Result) -> Void) + /** + This function sends an image edit query to the OpenAI API and retrieves generated images in response. The Images Edit API enables you to edit images or graphics using OpenAI's powerful deep learning models. + + Example: + ``` + let query = ImagesEditQuery(image: "@whitecat.png", prompt: "White cat with heterochromia sitting on the kitchen table with a bowl of food", n: 1, size: "1024x1024") + openAI.imageEdits(query: query) { result in + //Handle result here + } + ``` + + - Parameters: + - query: An `ImagesEditQuery` object containing the input parameters for the API request. This includes the query parameters such as the image to be edited, an image to be used a mask if applicable, text prompt, image size, and other settings. + - completion: A closure which receives the result when the API request finishes. The closure's parameter, `Result`, will contain either the `ImagesResult` object with the generated images, or an error if the request failed. + **/ + func imageEdits(query: ImageEditsQuery, completion: @escaping (Result) -> Void) + + /** + This function sends an image variation query to the OpenAI API and retrieves generated images in response. The Images Variations API enables you to create a variation of a given image using OpenAI's powerful deep learning models. + + Example: + ``` + let query = ImagesVariationQuery(image: "@whitecat.png", n: 1, size: "1024x1024") + openAI.imageVariations(query: query) { result in + //Handle result here + } + ``` + + - Parameters: + - query: An `ImagesVariationQuery` object containing the input parameters for the API request. This includes the query parameters such as the image to use as a basis for the variation(s), image size, and other settings. + - completion: A closure which receives the result when the API request finishes. The closure's parameter, `Result`, will contain either the `ImagesResult` object with the generated images, or an error if the request failed. + **/ + func imageVariations(query: ImageVariationsQuery, completion: @escaping (Result) -> Void) + /** This function sends an embeddings query to the OpenAI API and retrieves embeddings in response. The Embeddings API enables you to generate high-dimensional vector representations of texts, which can be used for various natural language processing tasks such as semantic similarity, clustering, and classification. diff --git a/Tests/OpenAITests/OpenAITests.swift b/Tests/OpenAITests/OpenAITests.swift index 3aa976c2..a063269d 100644 --- a/Tests/OpenAITests/OpenAITests.swift +++ b/Tests/OpenAITests/OpenAITests.swift @@ -62,6 +62,44 @@ class OpenAITests: XCTestCase { XCTAssertEqual(inError, apiError) } + func testImageEdit() async throws { + let query = ImageEditsQuery(image: Data(), fileName: "whitecat.png", prompt: "White cat with heterochromia sitting on the kitchen table with a bowl of food", n: 1, size: "1024x1024") + let imagesResult = ImagesResult(created: 100, data: [ + .init(url: "http://foo.bar") + ]) + try self.stub(result: imagesResult) + let result = try await openAI.imageEdits(query: query) + XCTAssertEqual(result, imagesResult) + } + + func testImageEditError() async throws { + let query = ImageEditsQuery(image: Data(), fileName: "whitecat.png", prompt: "White cat with heterochromia sitting on the kitchen table with a bowl of food", n: 1, size: "1024x1024") + let inError = APIError(message: "foo", type: "bar", param: "baz", code: "100") + self.stub(error: inError) + + let apiError: APIError = try await XCTExpectError { try await openAI.imageEdits(query: query) } + XCTAssertEqual(inError, apiError) + } + + func testImageVariation() async throws { + let query = ImageVariationsQuery(image: Data(), fileName: "whitecat.png", n: 1, size: "1024x1024") + let imagesResult = ImagesResult(created: 100, data: [ + .init(url: "http://foo.bar") + ]) + try self.stub(result: imagesResult) + let result = try await openAI.imageVariations(query: query) + XCTAssertEqual(result, imagesResult) + } + + func testImageVariationError() async throws { + let query = ImageVariationsQuery(image: Data(), fileName: "whitecat.png", n: 1, size: "1024x1024") + let inError = APIError(message: "foo", type: "bar", param: "baz", code: "100") + self.stub(error: inError) + + let apiError: APIError = try await XCTExpectError { try await openAI.imageVariations(query: query) } + XCTAssertEqual(inError, apiError) + } + func testChats() async throws { let query = ChatQuery(model: .gpt4, messages: [ .init(role: .system, content: "You are Librarian-GPT. You know everything about the books."),