Skip to content

Commit

Permalink
issue #607 refactor matcher
Browse files Browse the repository at this point in the history
  • Loading branch information
vikasgupta78 committed Jun 9, 2023
1 parent 8dadbc4 commit 5ad7ce0
Showing 1 changed file with 8 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,15 @@ public Matcher() {
}

protected ZFrame<D,R,C> getTestData() throws ZinggClientException{
ZFrame<D,R,C> data = getPipeUtil().read(true, args.getNumPartitions(), true, args.getData());
ZFrame<D,R,C> data = getPipeUtil().read(true, true, args.getNumPartitions(), true, args.getData());
return data;
}

protected ZFrame<D, R, C> getFieldDefColumnsDS(ZFrame<D, R, C> testDataOriginal) {
return getDSUtil().getFieldDefColumnsDS(testDataOriginal, args, true);
}


protected ZFrame<D,R,C> getBlocked( ZFrame<D,R,C> testData) throws Exception, ZinggClientException{
LOG.debug("Blocking model file location is " + args.getBlockFile());
Tree<Canopy<R>> tree = getBlockingTreeUtil().readBlockingTree(args);
Expand Down Expand Up @@ -81,11 +86,12 @@ protected ZFrame<D,R,C> getBlocks(ZFrame<D,R,C>blocked, ZFrame<D,R,C>bAll) throw
return blocked.select(ColName.ID_COL, ColName.HASH_COL);
}

@Override
public void execute() throws ZinggClientException {
try {
// read input, filter, remove self joins
ZFrame<D,R,C> testDataOriginal = getTestData();
testDataOriginal = getDSUtil().getFieldDefColumnsDS(testDataOriginal, args, true);
testDataOriginal = getFieldDefColumnsDS(testDataOriginal);
ZFrame<D,R,C> testData = getStopWords().preprocessForStopWords(testDataOriginal);
testData = testData.repartition(args.getNumPartitions(), testData.col(ColName.ID_COL));
//testData = dropDuplicates(testData);
Expand Down

0 comments on commit 5ad7ce0

Please sign in to comment.