Skip to content

Commit

Permalink
Merge pull request #236 from Frank-Buss/frankbuss/verbose-json-bugfix
Browse files Browse the repository at this point in the history
fixed verboseJson encoding
  • Loading branch information
nezhyborets authored Jan 17, 2025
2 parents 843e087 + be88f6a commit f6d0744
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 4 deletions.
2 changes: 1 addition & 1 deletion Sources/OpenAI/Public/Models/AudioTranscriptionQuery.swift
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ extension AudioTranscriptionQuery: MultipartFormDataBodyEncodable {
.string(paramName: "prompt", value: prompt),
.string(paramName: "temperature", value: temperature),
.string(paramName: "language", value: language),
.string(paramName: "response_format", value: responseFormat)
.string(paramName: "response_format", value: responseFormat?.rawValue)
])
return bodyBuilder.build()
}
Expand Down
38 changes: 36 additions & 2 deletions Sources/OpenAI/Public/Models/AudioTranscriptionResult.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,41 @@
import Foundation

public struct AudioTranscriptionResult: Codable, Equatable {

/// The transcribed text.
/// The task type (always "transcribe" for transcriptions)
public let task: String?
/// The detected language
public let language: String?
/// The duration of the audio in seconds
public let duration: Double?
/// The transcribed text
public let text: String
/// The segments containing detailed information (only present in verbose_json format)
public let segments: [Segment]?

public init(
task: String? = nil,
language: String? = nil,
duration: Double? = nil,
text: String,
segments: [Segment]? = nil
) {
self.task = task
self.language = language
self.duration = duration
self.text = text
self.segments = segments
}

public struct Segment: Codable, Equatable {
public let id: Int
public let seek: Int
public let start: Double
public let end: Double
public let text: String
public let tokens: [Int]
public let temperature: Double
public let avg_logprob: Double
public let compression_ratio: Double
public let no_speech_prob: Double
}
}
33 changes: 32 additions & 1 deletion Tests/OpenAITests/OpenAITests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,38 @@ class OpenAITests: XCTestCase {
let result = try await openAI.audioTranscriptions(query: query)
XCTAssertEqual(result, transcriptionResult)
}


func testVerboseJsonAudioTranscriptions() async throws {
let data = Data()
let query = AudioTranscriptionQuery(file: data, fileType: .m4a, model: .whisper_1, responseFormat: .verboseJson)

let transcriptionResult = AudioTranscriptionResult(
task: "transcribe",
language: "english",
duration: 3.759999990463257,
text: "This is a test.",
segments: [
AudioTranscriptionResult.Segment(
id: 0,
seek: 0,
start: 0,
end: 3.759999990463257,
text: " This is a test.",
tokens: [50364, 639, 307, 257, 1500, 13, 50552],
temperature: 0,
avg_logprob: -0.5153926610946655,
compression_ratio: 0.7142857313156128,
no_speech_prob: 0.08552933484315872
)
]
)

try self.stub(result: transcriptionResult)

let result = try await openAI.audioTranscriptions(query: query)
XCTAssertEqual(result, transcriptionResult)
}

func testAudioTranscriptionsError() async throws {
let data = Data()
let query = AudioTranscriptionQuery(file: data, fileType: .m4a, model: .whisper_1)
Expand Down

0 comments on commit f6d0744

Please sign in to comment.