Skip to content

Commit

Permalink
Merge pull request #113 from solidSpoon/whisper-issue
Browse files Browse the repository at this point in the history
优化生成字幕的逻辑
  • Loading branch information
solidSpoon authored Feb 22, 2025
2 parents afe656a + d5e502e commit 2f3b07e
Show file tree
Hide file tree
Showing 17 changed files with 466 additions and 147 deletions.
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "dash-player",
"productName": "DashPlayer",
"version": "5.1.3",
"version": "5.1.4",
"description": "My Electron application description",
"main": ".vite/build/main.js",
"scripts": {
Expand Down Expand Up @@ -139,6 +139,7 @@
"tailwind-merge": "^2.2.1",
"tailwindcss-animate": "^1.0.7",
"tencentcloud-sdk-nodejs": "^4.0.764",
"ts-error": "^1.0.6",
"vaul": "^0.9.0",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.22.5",
Expand Down
9 changes: 0 additions & 9 deletions src/backend/errors/CancelByUserError.ts

This file was deleted.

13 changes: 13 additions & 0 deletions src/backend/errors/errors.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { ExtendableError } from 'ts-error';

/**
* Whisper 相应格式错误
*/
export class WhisperResponseFormatError extends ExtendableError {
}

/**
* 任务被用户取消
*/
export class CancelByUserError extends ExtendableError {
}
61 changes: 22 additions & 39 deletions src/backend/objs/OpenAiWhisperRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,9 @@ import RateLimiter from '@/common/utils/RateLimiter';
import StrUtil from '@/common/utils/str-util';
import { Cancelable } from '@/common/interfaces';
import OpenAI from 'openai';

import { z } from 'zod';
import dpLog from '@/backend/ioc/logger';

const WhisperResponseVerifySchema = z.object({
language: z.string(),
duration: z.union([z.number(), z.string()]),
text: z.string(),
segments: z.array(z.object({
seek: z.number(),
start: z.number(),
end: z.number(),
text: z.string()
}))
});

export interface WhisperResponse {
language: string;
duration: number;
text: string;
offset: number;
segments: {
seek: number;
start: number;
end: number;
text: string;
}[];
}
import { WhisperResponseFormatError } from '@/backend/errors/errors';
import { WhisperResponse, WhisperResponseVerifySchema } from '@/common/types/video-info';

class OpenAiWhisperRequest implements Cancelable {
private readonly file: string;
Expand All @@ -55,33 +30,41 @@ class OpenAiWhisperRequest implements Cancelable {
public async invoke(): Promise<WhisperResponse> {
this.cancel();
await RateLimiter.wait('whisper');
this.abortController = new AbortController();
const transcription = await this.openAi.audio.transcriptions.create({
file: fs.createReadStream(this.file),
model: "whisper-1",
response_format: "verbose_json",
timestamp_granularities: ["segment"]
}, {signal: this.abortController.signal});
const transcription = await this.doTranscription();
// 用 zed 校验一下 transcription 是否为 类型 TranscriptionVerbose
const parseRes = WhisperResponseVerifySchema.safeParse(transcription);
if (!parseRes.success) {
// dperror 为什么不匹配
dpLog.error('Invalid response from OpenAI', parseRes.error.errors);
throw new Error('Invalid response from OpenAI');
throw new WhisperResponseFormatError();
}
return {
language: transcription.language,
duration: Number(transcription.duration),
duration: transcription.duration,
text: transcription.text,
offset: 0,
segments: transcription.segments?.map((seg) => ({
seek: seg.seek,
start: seg.start,
end: seg.end,
text: seg.text
}))??[]
}
})) ?? []
};

}

private async doTranscription() {
this.abortController = new AbortController();
try {
return await this.openAi.audio.transcriptions.create({
file: fs.createReadStream(this.file),
model: 'whisper-1',
response_format: 'verbose_json',
timestamp_granularities: ['segment']
}, { signal: this.abortController.signal });
} catch (error) {
dpLog.error(error);
throw error;
}
}

public cancel(): void {
Expand Down
54 changes: 54 additions & 0 deletions src/backend/objs/config-tender.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { z } from 'zod';
import fs from 'fs';
import path from 'path';

/**
* 配置文件托管类
* @template T 配置类型
* @template S Zod Schema 类型
*/
export class ConfigTender<T, S extends z.ZodType<T>> {
private readonly configPath: string;
private readonly schema: S;

constructor(configPath: string, schema: S, defaultValue?: T) {
this.configPath = configPath;
this.schema = schema;

// 确保目录存在
const dir = path.dirname(configPath);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}

// 如果文件不存在且提供了默认值,则创建文件
if (!fs.existsSync(configPath) && defaultValue) {
this.save(defaultValue);
}
}

/**
* 读取整个配置
*/
get(): T {
try {
const content = fs.readFileSync(this.configPath, 'utf-8');
const parsed = JSON.parse(content);
return this.schema.parse(parsed);
} catch (error) {
throw new Error(`Failed to read config: ${error}`);
}
}

/**
* 保存整个配置
*/
save(config: T): void {
try {
const validated = this.schema.parse(config);
fs.writeFileSync(this.configPath, JSON.stringify(validated, null, 2));
} catch (error) {
throw new Error(`Failed to save config: ${error}`);
}
}
}
6 changes: 6 additions & 0 deletions src/backend/services/FfmpegService.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { VideoInfo } from '@/common/types/video-info';


export default interface FfmpegService {
Expand Down Expand Up @@ -82,5 +83,10 @@ export default interface FfmpegService {
}): Promise<string>;

trimVideo(inputPath: string, startTime: number, endTime: number, outputPath: string): Promise<void>;

/**
* Get video information
*/
getVideoInfo(filePath: string): Promise<VideoInfo>;
}

7 changes: 5 additions & 2 deletions src/backend/services/impl/DpTaskServiceImpl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ import { DpTask, dpTask, DpTaskState, InsertDpTask } from '@/backend/db/tables/d

import LRUCache from 'lru-cache';
import TimeUtil from '@/common/utils/TimeUtil';
import ErrorConstants from '@/common/constants/error-constants';
import { injectable, postConstruct } from 'inversify';
import DpTaskService from '@/backend/services/DpTaskService';
import dpLog from '@/backend/ioc/logger';
import { Cancelable } from '@/common/interfaces';
import CancelByUserError from '@/backend/errors/CancelByUserError';

import { CancelByUserError } from '@/backend/errors/errors';

@injectable()
export default class DpTaskServiceImpl implements DpTaskService {
Expand Down Expand Up @@ -188,6 +188,9 @@ export default class DpTaskServiceImpl implements DpTaskService {
public registerTask(taskId: number, process: Cancelable) {
const existingProcesses = this.taskMapping.get(taskId) || [];
this.taskMapping.set(taskId, [...existingProcesses, process]);
if (this.cancelQueue.has(taskId)) {
process.cancel();
}
}

}
31 changes: 30 additions & 1 deletion src/backend/services/impl/FfmpegServiceImpl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ import TYPES from '@/backend/ioc/types';
import FfmpegService from '@/backend/services/FfmpegService';
import FfmpegTask from '@/backend/objs/FfmpegTask';
import DpTaskService from '@/backend/services/DpTaskService';
import CancelByUserError from '@/backend/errors/CancelByUserError';
import dpLog from '@/backend/ioc/logger';
import ffmpeg from 'fluent-ffmpeg';
import LocationService, { ProgramType } from '@/backend/services/LocationService';
import { VideoInfo } from '@/common/types/video-info';
import { CancelByUserError } from '@/backend/errors/errors';

@injectable()
export default class FfmpegServiceImpl implements FfmpegService {
Expand Down Expand Up @@ -100,6 +101,34 @@ export default class FfmpegServiceImpl implements FfmpegService {
});
}

/**
* 获取视频文件的详细信息
*/
@WaitLock('ffprobe')
@logParams()
public async getVideoInfo(filePath: string): Promise<VideoInfo> {
// 获取文件基本信息
const stats = await fs.promises.stat(filePath);

// 获取ffprobe信息
const probeData = await new Promise<any>((resolve, reject) => {
ffmpeg.ffprobe(filePath, (err, metadata) => {
if (err) reject(err);
else resolve(metadata);
});
});

return {
filename: path.basename(filePath),
duration: probeData.format.duration || 0,
size: stats.size,
modifiedTime: stats.mtimeMs,
createdTime: stats.ctimeMs,
bitrate: probeData.format.bit_rate ? parseInt(probeData.format.bit_rate) : undefined,
videoCodec: probeData.streams.find((s: any) => s.codec_type === 'video')?.codec_name,
audioCodec: probeData.streams.find((s: any) => s.codec_type === 'audio')?.codec_name
};
}

/**
* 截取视频的缩略图
Expand Down
Loading

0 comments on commit 2f3b07e

Please sign in to comment.