-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_subtitiles.py
36 lines (29 loc) · 1.05 KB
/
parse_subtitiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import srt
import re
with open('Her.srt') as file:
subtitles = srt.parse(file.read())
ln = 0
le = 0
quotes = []
def clean_quote(raw_quote):
quote = raw_quote.replace('\n', ' ').strip().lstrip("- ")
return quote
# start from index 137
for subtitle in subtitles:
if subtitle.index >= 137:
if '<i>' in subtitle.content:
# use regex to match what is in <i> tags (could include \n)
pattern = re.compile(r'<i>(.*?)</i>', re.DOTALL)
match = pattern.search(subtitle.content)
if match:
quote_piece = clean_quote(match.group(1))
if (subtitle.index - ln == 1) and \
(subtitle.start.seconds - le <= 2):
quotes[-1] += f" {quote_piece}"
else:
quotes.append(quote_piece)
ln = subtitle.index
le = subtitle.end.seconds
with open("fortunes/samantha", "w", encoding='utf-8') as f:
f.write("\n%\n".join(quotes))
print("Quotes have been compiled in ./fortunes/samantha")