-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtext.py
105 lines (82 loc) · 3.27 KB
/
text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from pydub import AudioSegment
import subprocess
import math
import cv2
import os
offset = 50
def get_audio_duration(audio_file):
return len(AudioSegment.from_file(audio_file))
def write_text(text, frame, video_writer):
font = cv2.FONT_HERSHEY_SIMPLEX
white_color = (255, 255, 255)
black_color = (0, 0, 0)
thickness = 10
font_scale = 3
border = 5
# Calculate the position for centered text
text_size = cv2.getTextSize(text, font, font_scale, thickness)[0]
text_x = (frame.shape[1] - text_size[0]) // 2 # Center horizontally
text_y = (frame.shape[0] + text_size[1]) // 2 # Center vertically
org = (text_x, text_y) # Position of the text
frame = cv2.putText(frame, text, org, font, font_scale, black_color, thickness + border * 2, cv2.LINE_AA)
frame = cv2.putText(frame, text, org, font, font_scale, white_color, thickness, cv2.LINE_AA)
video_writer.write(frame)
def add_narration_to_video(narrations, input_video, output_dir, output_file):
# Open the video file
cap = cv2.VideoCapture(input_video)
# Define the codec and create a VideoWriter object to save the output video
fourcc = cv2.VideoWriter_fourcc(*'XVID')
temp_video = os.path.join(output_dir, "with_transcript.avi")
out = cv2.VideoWriter(temp_video, fourcc, 30, (int(cap.get(3)), int(cap.get(4))))
full_narration = AudioSegment.empty()
for i, narration in enumerate(narrations):
audio = os.path.join(output_dir, "narrations", f"narration_{i+1}.mp3")
duration = get_audio_duration(audio)
narration_frames = math.floor(duration / 1000 * 30)
full_narration += AudioSegment.from_file(audio)
char_count = len(narration.replace(" ", ""))
ms_per_char = duration / char_count
frames_written = 0
words = narration.split(" ")
for w, word in enumerate(words):
word_ms = len(word) * ms_per_char
if i == 0 and w == 0:
word_ms -= offset
if word_ms < 0:
word_ms = 0
for _ in range(math.floor(word_ms/1000*30)):
ret, frame = cap.read()
if not ret:
break
write_text(word, frame, out)
frames_written += 1
for _ in range(narration_frames - frames_written):
ret, frame = cap.read()
out.write(frame)
while out.isOpened():
ret, frame = cap.read()
if not ret:
break
out.write(frame)
temp_narration = os.path.join(output_dir, "narration.mp3")
full_narration.export(temp_narration, format="mp3")
# Release the VideoCapture and VideoWriter objects
cap.release()
out.release()
# Close all OpenCV windows (if any)
cv2.destroyAllWindows()
ffmpeg_command = [
'ffmpeg',
'-y',
'-i', temp_video,
'-i', temp_narration,
'-map', '0:v', # Map video from the first input
'-map', '1:a', # Map audio from the second input
'-c:v', 'copy', # Copy video codec
'-c:a', 'aac', # AAC audio codec
'-strict', 'experimental',
os.path.join(output_dir, output_file)
]
subprocess.run(ffmpeg_command, capture_output=True)
os.remove(temp_video)
os.remove(temp_narration)