Skip to content

Commit

Permalink
rename file adapter to dbm adapter, update basic example
Browse files Browse the repository at this point in the history
  • Loading branch information
GreatYYX committed Mar 22, 2018
1 parent 400cce7 commit 2ee9e19
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 16 deletions.
30 changes: 23 additions & 7 deletions examples/basic/basic.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os
import rltk


class Record1(rltk.Record):
@property
def id(self):
Expand All @@ -23,9 +22,13 @@ def value(self):
return v[0] if len(v) > 0 else ''


ds1 = rltk.Dataset(reader=rltk.CSVReader(filename='ds1.csv'), record_class=Record1, adapter=rltk.MemoryAdapter())
ds1 = rltk.Dataset(reader=rltk.CSVReader(filename='ds1.csv'),
record_class=Record1, adapter=rltk.MemoryAdapter())
ds1.build_index()
ds2 = rltk.Dataset(reader=rltk.JsonLinesReader(filename='ds2.jl'), record_class=Record2, adapter=rltk.MemoryAdapter())
# ds2 = rltk.Dataset(reader=rltk.JsonLinesReader(filename='ds2.jl'),
# record_class=Record2, adapter=rltk.MemoryAdapter())
ds2 = rltk.Dataset(reader=rltk.JsonLinesReader(filename='ds2.jl'),
record_class=Record2, adapter=rltk.DBMAdapter('file_index'))
ds2.build_index()

# for r in ds1:
Expand Down Expand Up @@ -55,11 +58,24 @@ def value(self):
#
# output_filename='/path/to/blocks')
#
feature_vector = []
pairs = rltk.get_record_pairs(ds1, ds2) # same to without blocks
# pairs = rltk.iterate_on_datasets(ds1, ds2, '/path/to/blocks', batch_size=1000000)



print('without block...')
pairs = rltk.get_record_pairs(ds1, ds2)
for r1, r2 in pairs:
print(r1.id, r1.value, '\t', r2.id, r2.value)

print('with block...')
# blocks_raw = [
# {'1': 'b'},
# {'2': ['c', 'd']}
# ]
# blocks = rltk.BlockReader(blocks_raw)
blocks = rltk.BlockReader(rltk.JsonLinesReader('block.jl'))
pairs = rltk.get_record_pairs(ds1, ds2, blocks)
for r1, r2 in pairs:
print(r1.id, r2.id)
print(r1.id, r1.value, '\t', r2.id, r2.value)
# v1 = rltk.levenshtein_similarity(r1.value.lower(), r2.value.lower())
# print(v1)

2 changes: 2 additions & 0 deletions examples/basic/block.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"1":["b"]}
{"2":["c", "d"]}
10 changes: 3 additions & 7 deletions rltk/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
from evaluation import *
from indexer import *
from similarity import *
from tokenizer import *
from core import *
from record import Record
from record_iterator import RecordIterator
from rltk.record import Record
from rltk.dataset import Dataset, get_record_pairs
from rltk.io import *
2 changes: 1 addition & 1 deletion rltk/io/adapter/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from rltk.io.adapter.adapter import Adapter
from rltk.io.adapter.memory_adapter import MemoryAdapter
from rltk.io.adapter.file_adapter import FileAdapter
from rltk.io.adapter.dbm_adapter import DBMAdapter
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from rltk.record import Record


class FileAdapter(Adapter):
class DBMAdapter(Adapter):
def __init__(self, filename, dbm_class=dbm.ndbm):
"""
:dbm_class dbm, dbm.gnu, dbm.ndbm, dbm.dumb (same as dbm)
Expand Down

0 comments on commit 2ee9e19

Please sign in to comment.