Skip to content

Commit

Permalink
add argument size to dataset for debugging
Browse files Browse the repository at this point in the history
  • Loading branch information
GreatYYX committed Apr 3, 2018
1 parent 7caf658 commit 9ca88f5
Showing 1 changed file with 9 additions and 2 deletions.
11 changes: 9 additions & 2 deletions rltk/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,17 @@ class Dataset(object):
reader (Reader, optional): Input reader.
record_class (type(Record), optional): Sub class of Record.
adapter (KeyValueAdapter, optional): Specify where to store indexed data. Defaults to MemoryAdapter.
size (int, optional): Maximum size of records will be in Dataset, defaults to None, can be used when debugging.
Note:
Set reader, record_class and adapter if new a Dataset needs to be generated.
If Dataset is already generated and stored in a permanent adapter, only adapter needs to be provided.
"""
def __init__(self, reader: Reader = None, record_class: type(Record) = None, adapter: KeyValueAdapter = None):
def __init__(self, reader: Reader = None, record_class: type(Record) = None, adapter: KeyValueAdapter = None,
size: int = None):
if not adapter:
adapter = MemoryAdapter()
self._adapter = adapter
self._size = size

# build index
if reader and record_class:
Expand All @@ -31,11 +33,16 @@ def __init__(self, reader: Reader = None, record_class: type(Record) = None, ada
def _build_index(self):
if not self._reader or not self._record_class:
raise ValueError('Reader or Record class is not provided.')
size = 0
for raw_object in self._reader:
record_instance = self._record_class(raw_object)
generate_record_property_cache(record_instance)
self._adapter.set(record_instance.id, record_instance)

size += 1
if self._size and size >= self._size:
break

def get_record(self, record_id):
"""
Getter of a record.
Expand Down

0 comments on commit 9ca88f5

Please sign in to comment.