-
Notifications
You must be signed in to change notification settings - Fork 0
/
youtube_timestamps.py
50 lines (44 loc) · 1.85 KB
/
youtube_timestamps.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import pandas as pd
import datetime
import re
from pydub import AudioSegment
def convert_to_hms(seconds: float) -> str:
hours, remainder = divmod(seconds, 3600)
minutes, seconds = divmod(remainder, 60)
milliseconds = round((seconds % 1) * 1000)
output = f"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}"
return output
def convert_chunk(chunk: dict) -> str:
start = convert_to_hms(chunk["timestamp"][0])
end = convert_to_hms(chunk["timestamp"][1])
text = chunk["text"].strip()
return f"{start} --> {end}\n{text}\n\n"
def find_subtitles_with_word(file_path, my_list):
with open(file_path, "r") as file:
content = file.read()
subtitles = content.split("\n\n")
found_subtitles = []
for subtitle in subtitles:
lines = subtitle.split("\n")
if len(lines) >= 3:
time_line = lines[1]
subtitle_text = " ".join(lines[2:])
for word in my_list:
if word.lower() in time_line.lower() or word.lower() in subtitle_text.lower():
start_time, end_time = re.findall(r'\d+:\d+:\d+,\d+', time_line)
found_subtitles.append({
"start_time": start_time,
"end_time": end_time,
"subtitle": subtitle_text
})
return found_subtitles
def clip_audio(file_path, start_time, end_time):
audio = AudioSegment.from_file(file_path)
start_ms = timestamp_to_milliseconds(start_time)
end_ms = timestamp_to_milliseconds(end_time)
clipped_audio = audio[start_ms:end_ms]
return clipped_audio
def timestamp_to_milliseconds(timestamp):
time_obj = datetime.datetime.strptime(timestamp, "%H:%M:%S,%f")
milliseconds = (time_obj.minute * 60 + time_obj.second) * 1000 + int(time_obj.microsecond / 1000)
return milliseconds