Skip to content

Commit

Permalink
update clone of file iterator to store state
Browse files Browse the repository at this point in the history
  • Loading branch information
GreatYYX committed Apr 19, 2017
1 parent 727dbdc commit 8fef832
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 9 deletions.
12 changes: 8 additions & 4 deletions examples/file_iter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,13 @@
for id, value in iter:
print id, value

iter2 = iter.copy()
iter3 = iter.copy()
print '----'
iter1 = tk.get_file_iterator('file_iter_test.txt', type='text')
for id, value in iter1:
print id, value
break
iter2 = iter1.copy()
for id, value in iter1:
print id, value
for id, value in iter2:
print id, value
for id, value in iter3:
print id, value
24 changes: 19 additions & 5 deletions rltk/file_iterator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
import csv
import hashlib
import itertools

from jsonpath_rw import parse


Expand Down Expand Up @@ -35,20 +37,31 @@ def __init__(self, file_path, type='text', **kwargs):
self._id_prefix = hashlib.md5(file_path).hexdigest()[:6]

def __copy__(self):
return FileIterator(self._file_path, self._type, **self._kwargs)
"""
Clone the iterator include states
"""
# https://docs.python.org/2/library/itertools.html#itertools.tee
# tee is not that helpful here, and it will also occupy a lot of memory
# self._file_handler, new_iter = itertools.tee(self._file_handler)

new_iter = FileIterator(self._file_path, self._type, **self._kwargs)
for _ in new_iter:
if new_iter._count == self._count:
break
return new_iter

def copy(self):
return self.__copy__()

def next(self):
"""
Returns:
misc, dict: object id, value
str, dict: object id, value
"""
try:
oid, value = None, None
if self._type == 'json_line':
line = self._file_handler.next()
line = next(self._file_handler)
line = json.loads(line)

matches = self._id_path_parser.find(line)
Expand Down Expand Up @@ -79,14 +92,15 @@ def next(self):
return oid, value

except StopIteration as e:
self._file_handler.close()
# self._file_handler.close()
raise e

def __iter__(self):
return self

def __del__(self):
try:
self._file_handler.close()
pass
# self._file_handler.close()
except:
pass

0 comments on commit 8fef832

Please sign in to comment.