Skip to content

Commit

Permalink
增加说明 增加图片 增加输出字幕
Browse files Browse the repository at this point in the history
  • Loading branch information
m986883511 committed Aug 1, 2021
1 parent b66c592 commit 81ce5f8
Show file tree
Hide file tree
Showing 11 changed files with 138 additions and 29 deletions.
65 changes: 59 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,25 +1,78 @@
# extract-video-subtittle
使用深度学习框架提取视频硬字幕
使用深度学习框架提取视频硬字幕

本地识别无需联网;

CPU识别速度可观;

容器提供API接口;



# 运行环境

本项目运行环境非常好搭建,我做好了docker容器免安装各种深度学习包
本项目运行环境非常好搭建,我做好了docker容器免安装各种深度学习包;

提供windows界面操作;

提供界面操作
容器为CPU版本;



## 视频演示

xxxx
bilibili



## 程序说明

1、先启动后端容器实例

```shell
docker run -d -p 6666:6666 m986883511/extract_subtitles
```

![image-20210801214757813](image/docker-run.png)

2、启动程序

简单介绍页面

1:点击左边按钮连接第一步启动的容器;

2:视频提取字幕的总进度

3:当前视频帧显示的位置,就是视频进度条

4:识别出来的文字会在这里显示一下

![image-20210801215010179](image/2-run-exe.png)

![image-20210801215258761](image/3-view.png)

3、点击选择视频确认字幕位置

点击选择视频按钮,这时你可以拖动进度条到有字幕的位置;然后点击选择字幕区域;在视频中画一个矩形;

![image-20210801215258761](image/4-rect.png)

4、点击测试连接API

![image-20210801220206554](image/5-connect.png)

后端没问题的话,会显示已连通;此时所有步骤准备就绪

5、开始识别

点击请先完成前几步按钮,内部分为这几个步骤

1. 本地通过ffmpeg提取视频声音保存到temp目录(0%-10%)
2. api通信将声音文件发送到容器内,容器内spleeter库提取声音中人声,结果保存在容器内temp目录,很耗时间,吃CPU和内存(10%-30)
3. api通信,将人声根据停顿分片,返回分片结果,耗较短的时间(30%-40%)
4. 根据说话分片时间开始识别字幕(40-%100%)

当100%的时候查看temp目录就生成了和视频同名的srt字幕文件



Expand Down Expand Up @@ -47,7 +100,7 @@ C:/Python/Python38-32/Scripts/pyinstaller.exe main.spec

```

# 深度学习后端
本项目中深度学习源代码为/docker/backend.tar
# 参考资料
本项目中深度学习源代码为/docker/backend

原作者为:https://github.com/YaoFANGUK/video-subtitle-extractor
Binary file added image/2-run-exe.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added image/3-view.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added image/4-rect.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added image/5-connect.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added image/docker-run.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added image/view.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added image/weixin.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
81 changes: 72 additions & 9 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import datetime
import json
import os
import time
import sys
Expand All @@ -7,16 +9,60 @@
sys.path.append(os.path.join(current_dir, 'ffmpeg'))

import cv2
import srt
from PyQt5 import QtGui, QtWidgets
from PyQt5.QtCore import *
from PyQt5.QtGui import QPixmap
from PyQt5.QtWidgets import QTextEdit, QLabel, QFileDialog, QMessageBox
from PyQt5.QtWidgets import QSlider, QVBoxLayout, QLineEdit, QWidget
from PyQt5.QtWidgets import QSlider, QVBoxLayout, QLineEdit, QWidget, QHBoxLayout
from PyQt5.QtWidgets import QPushButton, QGridLayout, QDialog, QTabWidget

from interface import ExtractSubtitleApi
from custom_component import PaintRectLabel
from scripts import get_extract_voice_progress
from scripts import get_extract_voice_progress, get_data_time


class SaveResult:
def __init__(self):
self.result = {
'video': {},
'text': {
'sub_tittle': []
}
}

def get_data_time(self, str_time):
return datetime.timedelta(hours=int(str_time[0:2]), minutes=int(str_time[4:5]),
seconds=int(str_time[7:8]), milliseconds=int(str_time[9:11]))

def add_sub_tittle(self, start_time, end_time, text):
if text:
self.result['text']['sub_tittle'].append(
{'start_time': start_time, 'end_time': end_time, 'text': text})

def set_video_msg(self, video_path):
self.result['video_path'] = video_path

def save_json(self):
video_path = self.result.get('video_path', '')
video_name = ''.join(os.path.basename(video_path).split('.')[:-1])
if video_name:
save_path = os.path.join(current_dir, 'temp', video_name + '.json')
with open(save_path, 'w', encoding='utf-8') as f:
f.write(json.dumps(self.result, indent=4, ensure_ascii=False))
else:
print('not set video name')

def save_srt(self):
video_path = self.result.get('video_path', '')
video_name = ''.join(os.path.basename(video_path).split('.')[:-1])
save_path = os.path.join(current_dir, 'temp', video_name + '.srt')
sub_tittles = [
srt.Subtitle(i, start=get_data_time(i['start_time']), end=get_data_time(i['end_time']), content=i['text'])
for i in self.result['text']['sub_tittle']
]
with open(save_path, 'w') as f:
f.write(srt.compose(sub_tittles))


class SoftData:
Expand Down Expand Up @@ -105,11 +151,13 @@ def __init__(self, load_picture_thread: LoadVideoPicture):
def get_current_sub_tittle_base64_img(self):
rect_frame = SoftData.Video.current_frame[SoftData.Video.rect[1]:SoftData.Video.rect[3],
SoftData.Video.rect[0]:SoftData.Video.rect[2]]
image = cv2.imencode('.jpg',rect_frame)[1]
image = cv2.imencode('.jpg', rect_frame)[1]
image_code = str(base64.b64encode(image))[2:-1]
return image_code

def run(self):
save_result = SaveResult()
save_result.set_video_msg(SoftData.Path.video)
# 1、视频人声和背景声音分离
self.log_signal.emit('开始提取人声结果,可能等待较长时间......')
result = self.api_interface.extract_human_voice_from_sound(SoftData.Path.sound)
Expand All @@ -131,21 +179,31 @@ def run(self):
zheng = int(time_zhong / (1000 / SoftData.Video.fps))
self.load_picture_thread.set_value(zheng)
result = self.api_interface.text_recognition(self.get_current_sub_tittle_base64_img())
self.log_signal.emit('{}'.format(value))
self.log_signal.emit('{}'.format(result))
text = ';'.join([value[1] for value in result ])
time_point='{}-{}'.format(time.strftime("%H:%M:%S", time.gmtime(value[0]/100)),
time.strftime("%H:%M:%S", time.gmtime(value[1]/100)))
text = ';'.join([value[1] for value in result])
start_time = time.strftime("%H:%M:%S", time.gmtime(value[0] // 1000))
start_time = start_time + '.{}'.format(value[0] % 1000)
end_time = time.strftime("%H:%M:%S", time.gmtime(value[1] // 1000))
end_time = end_time + '.{}'.format(value[1] % 1000)
print(value, start_time, end_time)
time_point = '{}-{}'.format(start_time, end_time)
self.load_picture_thread.subtittle_result_label.setText('{}: {}'.format(time_point, text))
text = '{}'.format(text)
save_result.add_sub_tittle(start_time, end_time, text)
else:
self.progress_signal.emit(100)
self.log_signal.emit('视频字幕提取成功')
save_result.save_json()
save_result.save_srt()


class MainUi(QDialog):
process = QProcess()

def __init__(self):
super().__init__()
self.setWindowTitle('视频硬字幕提取 https://github.com/m986883511/extract-video-subtittle')
self.api_interface = ExtractSubtitleApi()
self.init_ui()
self.video_capture = None
Expand Down Expand Up @@ -195,15 +253,20 @@ def get_background_print_tab(self):

def get_thank_author_tab(self):
thank_author_tab = QWidget()
layout = QVBoxLayout()
self.thank_author_label = QLabel('一张感谢作者的二维码转账图,哈哈哈')
layout = QHBoxLayout()
self.thank_author_label = QLabel('软件觉得不错,可以打赏')
self.thank_author_picture = QLabel()
picture = os.path.join(current_dir, 'image', 'weixin.jpg')
self.thank_author_picture.setScaledContents(True)
self.thank_author_picture.setPixmap(QPixmap(picture))
layout.addWidget(self.thank_author_label)
layout.addWidget(self.thank_author_picture)
thank_author_tab.setLayout(layout)
return thank_author_tab, '感谢作者'

def init_ui(self):
self.setMinimumWidth(1000)
self.setMinimumHeight(618)
self.setMinimumHeight(800)
layout = QVBoxLayout()
self.video_path_button = QPushButton(SoftData.Button.select_video_path)
self.video_path_button.clicked.connect(self.open_dialog_select_video_file)
Expand Down
2 changes: 1 addition & 1 deletion requeirments.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ pyqt5
requests
opencv-python
requests_toolbelt

srt
19 changes: 6 additions & 13 deletions scripts.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import datetime

import cv2


Expand Down Expand Up @@ -26,16 +28,7 @@ def get_extract_voice_progress(input_str: str)->int:
progress = get_seconds(Value.now_time) / get_seconds(Value.all_time) * 100
return int(progress)

# def CropImage4File(image):
# sp = image.shape # 获取图像形状:返回【行数值,列数值】列表
# sz1 = sp[0] # 图像的高度(行 范围)
# sz2 = sp[1] # 图像的宽度(列 范围)
# # sz3 = sp[2] #像素值由【RGB】三原色组成
#
# # 你想对文件的操作
# a = int(sz1 / 2 - 64) # x start
# b = int(sz1 / 2 + 64) # x end
# c = int(sz2 / 2 - 64) # y start
# d = int(sz2 / 2 + 64) # y end
# cropImg = image[a:b, c:d] # 裁剪图像
# cv2.imwrite(dest, cropImg) # 写入图像路径

def get_data_time(str_time):
return datetime.timedelta(hours=int(str_time[0:2]), minutes=int(str_time[3:5]),
seconds=int(str_time[6:8]), milliseconds=int(str_time[9:]))

0 comments on commit 81ce5f8

Please sign in to comment.