Skip to content

Commit

Permalink
[output] Added .jsonl writer and removed --json_lines argument
Browse files Browse the repository at this point in the history
  • Loading branch information
xenova committed Jul 13, 2021
1 parent e0e8b03 commit 76691b3
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 20 deletions.
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ Command line
[--ignore IGNORE]
[--message_receive_timeout MESSAGE_RECEIVE_TIMEOUT]
[--buffer_size BUFFER_SIZE] [--output OUTPUT]
[--overwrite] [--sort_keys] [--json_lines]
[--indent INDENT] [--pause_on_debug | --exit_on_debug]
[--overwrite] [--sort_keys] [--indent INDENT]
[--pause_on_debug | --exit_on_debug]
[--logging {none,debug,info,warning,error,critical} | --testing | --verbose | --quiet]
[--cookies COOKIES] [--proxy PROXY]
url
Expand Down
23 changes: 6 additions & 17 deletions chat_downloader/chat_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ def get_chat(self, url=None,
overwrite=True,
sort_keys=True,
indent=4,
json_lines=False,

# Formatting
format=SiteDefault('format'),
Expand Down Expand Up @@ -179,9 +178,6 @@ def get_chat(self, url=None,
nonnumerical input is provided, this will be used to indent
the objects. Defaults to 4
:type indent: Union[int, str], optional
:param json_lines: Output each chat item on a separate line, in JSON
format. This has no effect for .csv or .json files. Defaults to False
:type json_lines: bool, optional
:param format: Specify how messages should be formatted for printing,
defaults to the site's default value
:type format: SiteDefault, optional
Expand Down Expand Up @@ -300,19 +296,12 @@ def log_on_inactivity_timeout():
output_file = ContinuousWriter(
params['output'], indent=params['indent'], sort_keys=params['sort_keys'], overwrite=params['overwrite'])

def _write_function(item):

if output_file.is_default(): # not JSON or CSV
if params['json_lines']: # print json lines of item
item = json.dumps(
item, sort_keys=params['sort_keys'])
else:
# print formatted item
item = chat.format(item)

output_file.write(item, flush=True)

chat.callback = _write_function
if output_file.is_default():
chat.callback = lambda item: output_file.write(
chat.format(item), flush=True)
else:
chat.callback = lambda item: output_file.write(
item, flush=True)

chat.site = site_object

Expand Down
1 change: 0 additions & 1 deletion chat_downloader/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ def add_init_param(group, *keys, **kwargs):
add_chat_param(output_group, '--output', '-o')
add_chat_param(output_group, '--overwrite', is_boolean_flag=True)
add_chat_param(output_group, '--sort_keys', is_boolean_flag=True)
add_chat_param(output_group, '--json_lines', is_boolean_flag=True)
add_chat_param(output_group, '--indent', type=lambda x: int_or_none(x, x))

debug_group = parser.add_argument_group('Debugging/Testing Arguments')
Expand Down
16 changes: 16 additions & 0 deletions chat_downloader/output/continuous_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,21 @@ def write(self, item, flush=False, flatten=True):
self.flush()


class JSONLCW(CW):
"""
Class used to control the continuous writing of a JSON lines.
"""

def __init__(self, file_name, overwrite=True, sort_keys=True):
super().__init__(file_name, overwrite)
self.file = open(self.file_name, 'a', encoding='utf-8')
self.sort_keys = sort_keys

def write(self, item, flush=False):
print(json.dumps(item, sort_keys=self.sort_keys),
file=self.file, flush=flush)


class TXTCW(CW):
"""
Class used to control the continuous writing of a text to a TXT file.
Expand All @@ -195,6 +210,7 @@ class ContinuousWriter:
_SUPPORTED_WRITERS = {
'json': JSONCW,
'csv': CSVCW,
'jsonl': JSONLCW,
'txt': TXTCW
}

Expand Down

0 comments on commit 76691b3

Please sign in to comment.