-
Notifications
You must be signed in to change notification settings - Fork 109
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #803 from zinggAI/selectCols
use IPairBuilder for building pairs
- Loading branch information
Showing
5 changed files
with
125 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
9 changes: 9 additions & 0 deletions
9
common/core/src/main/java/zingg/common/core/pairs/IPairBuilder.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
package zingg.common.core.pairs; | ||
|
||
import zingg.common.client.ZFrame; | ||
|
||
public interface IPairBuilder<S, D, R, C> { | ||
|
||
public ZFrame<D, R, C> getPairs(ZFrame<D,R,C>blocked, ZFrame<D,R,C>bAll) throws Exception; | ||
|
||
} |
55 changes: 55 additions & 0 deletions
55
common/core/src/main/java/zingg/common/core/pairs/SelfPairBuilder.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
package zingg.common.core.pairs; | ||
|
||
import org.apache.commons.logging.Log; | ||
import org.apache.commons.logging.LogFactory; | ||
|
||
import zingg.common.client.IArguments; | ||
import zingg.common.client.ZFrame; | ||
import zingg.common.client.util.ColName; | ||
import zingg.common.client.util.DSUtil; | ||
|
||
public class SelfPairBuilder<S, D, R, C> implements IPairBuilder<S, D, R, C> { | ||
|
||
protected DSUtil<S, D, R, C> dsUtil; | ||
public static final Log LOG = LogFactory.getLog(SelfPairBuilder.class); | ||
protected IArguments args; | ||
|
||
public SelfPairBuilder(DSUtil<S, D, R, C> dsUtil, IArguments args) { | ||
this.dsUtil = dsUtil; | ||
this.args = args; | ||
} | ||
|
||
@Override | ||
public ZFrame<D, R, C> getPairs(ZFrame<D,R,C>blocked, ZFrame<D,R,C>bAll) throws Exception { | ||
ZFrame<D,R,C>joinH = getDSUtil().joinWithItself(blocked, ColName.HASH_COL, true).cache(); | ||
/*ZFrame<D,R,C>joinH = blocked.as("first").joinOnCol(blocked.as("second"), ColName.HASH_COL) | ||
.selectExpr("first.z_zid as z_zid", "second.z_zid as z_z_zid"); | ||
*/ | ||
//joinH.show(); | ||
joinH = joinH.filter(joinH.gt(ColName.ID_COL)); | ||
LOG.warn("Num comparisons " + joinH.count()); | ||
joinH = joinH.repartition(args.getNumPartitions(), joinH.col(ColName.ID_COL)); | ||
bAll = bAll.repartition(args.getNumPartitions(), bAll.col(ColName.ID_COL)); | ||
joinH = joinH.joinOnCol(bAll, ColName.ID_COL); | ||
LOG.warn("Joining with actual values"); | ||
//joinH.show(); | ||
bAll = getDSUtil().getPrefixedColumnsDS(bAll); | ||
//bAll.show(); | ||
joinH = joinH.repartition(args.getNumPartitions(), joinH.col(ColName.COL_PREFIX + ColName.ID_COL)); | ||
joinH = joinH.joinOnCol(bAll, ColName.COL_PREFIX + ColName.ID_COL); | ||
LOG.warn("Joining again with actual values"); | ||
//joinH.show(); | ||
return joinH; | ||
} | ||
|
||
public DSUtil<S, D, R, C> getDSUtil() { | ||
return dsUtil; | ||
} | ||
|
||
public void setDSUtil(DSUtil<S, D, R, C> dsUtil) { | ||
this.dsUtil = dsUtil; | ||
} | ||
|
||
|
||
|
||
} |
26 changes: 26 additions & 0 deletions
26
common/core/src/main/java/zingg/common/core/pairs/SelfPairBuilderSourceSensitive.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package zingg.common.core.pairs; | ||
|
||
import org.apache.commons.logging.Log; | ||
import org.apache.commons.logging.LogFactory; | ||
|
||
import zingg.common.client.IArguments; | ||
import zingg.common.client.ZFrame; | ||
import zingg.common.client.util.ColName; | ||
import zingg.common.client.util.DSUtil; | ||
|
||
public class SelfPairBuilderSourceSensitive<S, D, R, C> extends SelfPairBuilder<S, D, R, C> { | ||
|
||
public static final Log LOG = LogFactory.getLog(SelfPairBuilderSourceSensitive.class); | ||
|
||
public SelfPairBuilderSourceSensitive(DSUtil<S, D, R, C> dsUtil, IArguments args) { | ||
super(dsUtil, args); | ||
} | ||
|
||
@Override | ||
public ZFrame<D,R,C> getPairs(ZFrame<D,R,C> blocked, ZFrame<D,R,C> bAll) throws Exception{ | ||
// THIS LOG IS NEEDED FOR PLAN CALCULATION USING COUNT, DO NOT REMOVE | ||
LOG.info("in getBlocks, blocked count is " + blocked.count()); | ||
return getDSUtil().joinWithItselfSourceSensitive(blocked, ColName.HASH_COL, args).cache(); | ||
} | ||
|
||
} |