-
Notifications
You must be signed in to change notification settings - Fork 5
/
split-sentences-by-pipe.py
31 lines (24 loc) · 1011 Bytes
/
split-sentences-by-pipe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
"""
Split sentences by pipe delimiter.
Written by Ye Kyaw Thu, CADT, Cambodia.
Last upated: 10 Mar 2024
Usage:
python ./split-sentences-by-pipe.py --input ./Harry_Potter_all_char_separated.txt \
--output harry-potter.txt
"""
import argparse
def clean_sentences(input_file, output_file=None):
with open(input_file, "r", encoding="utf-8") as file:
data = file.read().split('|')
cleaned_sentences = [sentence.strip() for sentence in data if sentence.strip()]
if output_file:
with open(output_file, "w", encoding="utf-8") as out_file:
out_file.write('\n'.join(cleaned_sentences))
else:
print('\n'.join(cleaned_sentences))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Clean sentences from a text file.")
parser.add_argument("-i", "--input", required=True, help="Input filename")
parser.add_argument("-o", "--output", help="Output filename")
args = parser.parse_args()
clean_sentences(args.input, args.output)