Skip to content

Commit

Permalink
refactor structure of io package
Browse files Browse the repository at this point in the history
  • Loading branch information
GreatYYX committed Mar 21, 2018
1 parent 02fd47d commit da2991f
Show file tree
Hide file tree
Showing 49 changed files with 82 additions and 201,612 deletions.
65 changes: 65 additions & 0 deletions examples/basic/basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import os
import rltk


class Record1(rltk.Record):
@property
def id(self):
return self.raw_object['doc_id']

@property
def value(self):
return self.raw_object['doc_value']


class Record2(rltk.Record):
@property
def id(self):
return self.raw_object['ident']

@property
def value(self):
v = self.raw_object.get('values', list())
return v[0] if len(v) > 0 else ''


ds1 = rltk.Dataset(reader=rltk.CSVReader(filename='ds1.csv'), record_class=Record1, adapter=rltk.MemoryAdapter())
ds1.build_index()
ds2 = rltk.Dataset(reader=rltk.JsonLinesReader(filename='ds2.jl'), record_class=Record2, adapter=rltk.MemoryAdapter())
ds2.build_index()

# for r in ds1:
# print(r.id)
# for r in ds2:
# print(r.id)
# print(ds1.get_record('1').id)


# blocking_file = '/path/to/blocks'
# if not os.path.exists(blocking_file):
# # rltk.n_gram_blocking(
# # iterator1=venture_it,
# # tokens1=VentureRecord.assignee_token,
# #
# # iterator2=patent_it,
# # field2=PatentRecord.patent_name,
# # n_size2=3,
# #
# # output_filename='/path/to/blocks')
# rltk.inverted_index_blocking(
# iterator1=venture_it,
# # tokens1=VentureRecord.assignee_token,
# attribute1 = VentureRecord.assignee,
# iterator2=patent_it,
# tokens2=PatentRecord.patent_token,
#
# output_filename='/path/to/blocks')
#
feature_vector = []
pairs = rltk.get_record_pairs(ds1, ds2) # same to without blocks
# pairs = rltk.iterate_on_datasets(ds1, ds2, '/path/to/blocks', batch_size=1000000)
for r1, r2 in pairs:
print(r1.id, r2.id)
# v1 = rltk.levenshtein_similarity(r1.value.lower(), r2.value.lower())
# print(v1)

2 changes: 1 addition & 1 deletion examples/ex4/ex_data1.csv → examples/basic/ds1.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
id,value
doc_id,doc_value
1,hello
2,world
3,foo
Expand Down
5 changes: 5 additions & 0 deletions examples/basic/ds2.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"ident": "a", "values":["a1"]}
{"ident": "b", "values":["b1", "b2"]}
{"ident": "c", "values":["c1"]}

{"ident": "d"}
3 changes: 0 additions & 3 deletions examples/ex1/df_corpus_1.txt

This file was deleted.

1 change: 0 additions & 1 deletion examples/ex1/ex1.py

This file was deleted.

2 changes: 0 additions & 2 deletions examples/ex1/jl_file_1.jsonl

This file was deleted.

4 changes: 0 additions & 4 deletions examples/ex4/blocking.jsonl

This file was deleted.

25 changes: 0 additions & 25 deletions examples/ex4/ex4.py

This file was deleted.

5 changes: 0 additions & 5 deletions examples/ex4/ex_data2.csv

This file was deleted.

5 changes: 0 additions & 5 deletions examples/ex4/feature.jsonl

This file was deleted.

15 changes: 0 additions & 15 deletions examples/ex4/feature_config.json

This file was deleted.

10 changes: 0 additions & 10 deletions examples/ex4/label.jsonl

This file was deleted.

10 changes: 0 additions & 10 deletions examples/ex4/labeled_feature.jsonl

This file was deleted.

5 changes: 0 additions & 5 deletions examples/ex4/predicted.jsonl

This file was deleted.

27 changes: 0 additions & 27 deletions examples/file_iterator/file_iter.py

This file was deleted.

8 changes: 0 additions & 8 deletions examples/file_iterator/file_iter_test.csv

This file was deleted.

4 changes: 0 additions & 4 deletions examples/file_iterator/file_iter_test.jsonl

This file was deleted.

4 changes: 0 additions & 4 deletions examples/file_iterator/file_iter_test.txt

This file was deleted.

10 changes: 0 additions & 10 deletions examples/indexer/minhash_lsh_indexing.py

This file was deleted.

18 changes: 0 additions & 18 deletions examples/indexer/qgram_indexing.py

This file was deleted.

17 changes: 0 additions & 17 deletions examples/museum_gm/feature_config.json

This file was deleted.

9 changes: 0 additions & 9 deletions examples/museum_gm/feature_vector.jsonl

This file was deleted.

9 changes: 0 additions & 9 deletions examples/museum_gm/featurized.jsonl

This file was deleted.

84 changes: 0 additions & 84 deletions examples/museum_gm/gm.py

This file was deleted.

9 changes: 0 additions & 9 deletions examples/museum_gm/ground_truth.jsonl

This file was deleted.

0 comments on commit da2991f

Please sign in to comment.