Skip to content

Commit

Permalink
5k in 2 minutes
Browse files Browse the repository at this point in the history
  • Loading branch information
sonalgoyal committed Jun 9, 2022
1 parent 1767c9f commit f3cdc67
Show file tree
Hide file tree
Showing 33 changed files with 23 additions and 17 deletions.
2 changes: 1 addition & 1 deletion client/src/main/resources/log4j.properties
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ log4j.logger.akka=WARN, FILE
log4j.logger.breeze=WARN, FILE
log4j.logger.zingg=INFO
log4j.logger.zingg.hash=INFO
log4j.logger.zingg.distBlock=INFO
log4j.logger.zingg.distBlock=WARN
log4j.logger.zingg.block=INFO
log4j.logger.zingg.Matcher=INFO
log4j.logger.zingg.Model=INFO
Expand Down
4 changes: 2 additions & 2 deletions core/src/main/java/zingg/distBlock/BFn.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ else if (hash1 != null && hash2 != null && hash1.equals(hash2)) {
}

public void estimateChildren(Context ctx) {
//result.approxChildren = ctx.getDataSample().select(ColName.HASH_COL + index).distinct().count();
result.approxChildren = 100;
result.approxChildren = ctx.getDataSample().select(ColName.HASH_COL + index).distinct().count();
//result.approxChildren = 100;
}


Expand Down
28 changes: 14 additions & 14 deletions core/src/main/java/zingg/distBlock/BTreeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -126,24 +126,24 @@ public BFn getBestNode(Tree<BFn> tree, BFn parent, BFn node,
for (Integer j : fnsToTry.keySet()) {
Fn fn = fnsToTry.get(j);
if (!isFunctionUsed(tree, node, fn)) {
BFn bFnToTry = new BFn(fn, new FnResult());
bFnToTry.estimateElimCount(context);
BFn bFnToTry = new BFn(fn, new FnResult());
bFnToTry.estimateElimCount(context);
if (bFnToTry.getResult().compareTo(least) > 0){
//LOG.debug(" new is better");
bFnToTry.estimateChildren(context);
//LOG.debug("Comparing " + bFnToTry.result + " with " + least);
if (bFnToTry.getResult().approxChildren >= 1) {
if (bFnToTry.getResult().compareTo(least) > 0){
//LOG.debug(" new is better");
best = bFnToTry;
least = bFnToTry.getResult();
//greedy, how much better can it get
if (bFnToTry.getResult().getElimCount() == 0 ) return bFnToTry;
}
/*else {
LOG.debug(" old is better ");
}*/
if (bFnToTry.getResult().approxChildren > 1) {
best = bFnToTry;
least = bFnToTry.getResult();
//greedy, how much better can it get
if (bFnToTry.getResult().getElimCount() == 0 ) return bFnToTry;
}//childess is of no use

}
/*else {
LOG.debug(" old is better ");
}*/
}

}
return best;
}
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"class":"org.apache.spark.ml.feature.VectorAssembler","timestamp":1654787316522,"sparkVersion":"3.1.2","uid":"vecAssembler_86a5aa12df5d","paramMap":{"inputCols":["z_sim0","z_sim1","z_sim2","z_sim3","z_sim4","z_sim5","z_sim6","z_sim7","z_sim8","z_sim9","z_sim10","z_sim11","z_sim12","z_sim13","z_sim14","z_sim15","z_sim16","z_sim17","z_sim18","z_sim19"],"outputCol":"z_featurevector"},"defaultParamMap":{"handleInvalid":"error","outputCol":"vecAssembler_86a5aa12df5d__output"}}
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"class":"org.apache.spark.ml.feature.PolynomialExpansion","timestamp":1654787316588,"sparkVersion":"3.1.2","uid":"poly_958661eebf7b","paramMap":{"outputCol":"z_feature","inputCol":"z_featurevector","degree":3},"defaultParamMap":{"outputCol":"poly_958661eebf7b__output","degree":2}}
Binary file not shown.
Binary file not shown.
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"class":"org.apache.spark.ml.classification.LogisticRegressionModel","timestamp":1654787316644,"sparkVersion":"3.1.2","uid":"logreg_70037b80b4c6","paramMap":{"threshold":0.4,"regParam":1.0E-4,"predictionCol":"z_prediction","maxIter":100,"featuresCol":"z_feature","fitIntercept":true,"probabilityCol":"z_probability","labelCol":"z_isMatch"},"defaultParamMap":{"threshold":0.5,"regParam":0.0,"predictionCol":"prediction","elasticNetParam":0.0,"maxIter":100,"featuresCol":"features","rawPredictionCol":"rawPrediction","maxBlockSizeInMB":0.0,"family":"auto","fitIntercept":true,"aggregationDepth":2,"probabilityCol":"probability","tol":1.0E-6,"labelCol":"label","standardization":true}}
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"class":"org.apache.spark.ml.feature.VectorAssembler","timestamp":1654787316259,"sparkVersion":"3.1.2","uid":"vecAssembler_86a5aa12df5d","paramMap":{"inputCols":["z_sim0","z_sim1","z_sim2","z_sim3","z_sim4","z_sim5","z_sim6","z_sim7","z_sim8","z_sim9","z_sim10","z_sim11","z_sim12","z_sim13","z_sim14","z_sim15","z_sim16","z_sim17","z_sim18","z_sim19"],"outputCol":"z_featurevector"},"defaultParamMap":{"handleInvalid":"error","outputCol":"vecAssembler_86a5aa12df5d__output"}}
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"class":"org.apache.spark.ml.feature.PolynomialExpansion","timestamp":1654787316323,"sparkVersion":"3.1.2","uid":"poly_958661eebf7b","paramMap":{"outputCol":"z_feature","inputCol":"z_featurevector","degree":3},"defaultParamMap":{"outputCol":"poly_958661eebf7b__output","degree":2}}
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"class":"org.apache.spark.ml.classification.LogisticRegression","timestamp":1654787316382,"sparkVersion":"3.1.2","uid":"logreg_70037b80b4c6","paramMap":{"predictionCol":"z_prediction","maxIter":100,"featuresCol":"z_feature","fitIntercept":true,"probabilityCol":"z_probability","labelCol":"z_isMatch"},"defaultParamMap":{"threshold":0.5,"regParam":0.0,"predictionCol":"prediction","elasticNetParam":0.0,"maxIter":100,"featuresCol":"features","rawPredictionCol":"rawPrediction","maxBlockSizeInMB":0.0,"family":"auto","fitIntercept":true,"aggregationDepth":2,"probabilityCol":"probability","tol":1.0E-6,"labelCol":"label","standardization":true}}

0 comments on commit f3cdc67

Please sign in to comment.