-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcleanup-srts.sh
84 lines (72 loc) · 2.27 KB
/
cleanup-srts.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/env bash
# Default values for directories
source_directory="."
destination_directory="./transcripts"
# Function to display usage help
usage() {
echo "Usage: $0 [-s <source_directory>] [-d <destination_directory>] [-h]"
echo ""
echo "Options:"
echo " -s Specify the source directory to search for .srt files (default is current directory)"
echo " -d Specify the destination directory to store the .txt transcripts (default is ./transcripts)"
echo " -h Show this help message and exit"
exit 1
}
# Parse options
while getopts ":s:d:h" opt; do
case $opt in
s)
source_directory=$OPTARG
;;
d)
destination_directory=$OPTARG
;;
h)
usage
;;
\?)
echo "Invalid option: -$OPTARG" >&2
usage
;;
:)
echo "Option -$OPTARG requires an argument." >&2
usage
;;
esac
done
# Create destination directory if it doesn't exist
mkdir -p "$destination_directory"
# Find and copy .srt files to destination directory as .txt
find "$source_directory" -name "*.srt" -exec sh -c 'cp "$1" "'"$destination_directory"'/$(basename "${1%.srt}.txt")"' _ {} \;
# Reformat the .txt files in destination directory
for file in "$destination_directory"/*.txt; do
# Remove all empty lines from file
sed -i '/^$/d' "$file"
# Remove all milliseconds from timestamps
sed -i 's/,[0-9]\{3\}//g' "$file"
# Split the text into an array on lines that only contain a number
mapfile -t blocks < <(sed -n '/^[0-9]\+$/,$p' "$file")
# Open file for writing
exec 3>"$file"
# In each item in that array, remove newlines
for block in "${blocks[@]}"; do
if [[ $block =~ ^[0-9]+$ ]]; then
# If the line contains only a number, this is the start of a new subtitle block
if [[ -n $subtitle_block ]]; then
# Write the accumulated line to the file
echo "$subtitle_block" >&3
fi
# Reset the subtitle block and skip the line with the number
subtitle_block=""
else
# Remove all newlines and append the line to the subtitle block
subtitle_block+="${block//[$'\t\r\n']}"
fi
done
# Write the last accumulated subtitle block to the file
if [[ -n $subtitle_block ]]; then
echo "$subtitle_block" >&3
fi
# Close the file descriptor
exec 3>&-
done