Skip to content

Commit

Permalink
fix buffer size calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
GreatYYX committed Apr 16, 2018
1 parent efe5c9f commit fd20231
Showing 1 changed file with 13 additions and 9 deletions.
22 changes: 13 additions & 9 deletions rltk/io/writer/block_file_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,44 +5,47 @@


class BlockFileWriter(BlockWriter):
def __init__(self, filename, buffer_size=10000, set_size=1000, index_blacklist:set=None):
def __init__(self, filename, buffer_size=10000, set_size=float('inf'), index_blacklist:set=None):
self._filename = filename
self._temp_filename = filename + '.temp'
self._buffer_size = buffer_size
self._dict = dict()
self._data_size_in_buffer = 0
self._dict = dict() # buffer
self._set_size = set_size
self._blacklist = index_blacklist or set()

# clean up output file
open(self._filename, 'w').close()

def write(self, id1, id2):
# skip if id1 is in blacklist
if id1 in self._blacklist:
return

# add pairs
self._dict[id1] = self._dict.get(id1, set())
self._dict[id1].add(id2)
if id2 not in self._dict[id1]:
self._dict[id1].add(id2)
self._data_size_in_buffer += 1

# update id1 to blacklist when reaching threshold and remove id1 in memory
if len(self._dict[id1]) > self._set_size:
self._blacklist.add(id1)
self._data_size_in_buffer -= len(self._dict[id1])
del self._dict[id1]

# flush when buffer is full
if len(self._dict) >= self._buffer_size:
if self._data_size_in_buffer >= self._buffer_size:
self.flush()

def get_handler(self):
self.flush()
return self._filename

def flush(self):
if len(self._dict) == 0:
if self._data_size_in_buffer == 0:
return

# create empty file if it's not there
if not os.path.exists(self._filename):
open(self._filename, 'w').close()

fp = open(self._filename, 'r')
temp_fp = open(self._temp_filename, 'w')

Expand Down Expand Up @@ -79,6 +82,7 @@ def flush(self):
fp.close()
temp_fp.close()
self._dict = dict()
self._data_size_in_buffer = 0

# replace file
os.remove(self._filename)
Expand Down

0 comments on commit fd20231

Please sign in to comment.