Skip to content

Commit

Permalink
Merge pull request #319 from RavirajBaraiya/peekModel
Browse files Browse the repository at this point in the history
new phase PeekModel
  • Loading branch information
sonalgoyal committed Jun 9, 2022
2 parents bfadc5c + a29f5eb commit 2a8f81b
Show file tree
Hide file tree
Showing 5 changed files with 202 additions and 1 deletion.
2 changes: 2 additions & 0 deletions client/src/main/java/zingg/client/ZinggOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ public enum ZinggOptions {
LINK("link"),
GENERATE_DOCS("generateDocs"),
UPDATE_LABEL("updateLabel"),
ASSESS_MODEL("assessModel"),
PEEK_MODEL("peekModel"),
FIND_AND_LABEL("findAndLabel");

private String value;
Expand Down
32 changes: 32 additions & 0 deletions core/src/main/java/zingg/PeekModel.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package zingg;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import zingg.client.ZinggClientException;
import zingg.client.ZinggOptions;

public class PeekModel extends ZinggBase{

protected static String name = "zingg.PeekModel";
public static final Log LOG = LogFactory.getLog(PeekModel.class);

public PeekModel() {
setZinggOptions(ZinggOptions.PEEK_MODEL);
}

@Override
public void execute() throws ZinggClientException {
try {
LOG.info("PeekModel starts");

//do something

LOG.info("PeekModel finishes");
} catch (Exception e) {
e.printStackTrace();
throw new ZinggClientException(e.getMessage());
}
}

}
3 changes: 2 additions & 1 deletion core/src/main/java/zingg/ZFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ public ZFactory() {}
zinggers.put(ZinggOptions.GENERATE_DOCS, Documenter.name);
zinggers.put(ZinggOptions.UPDATE_LABEL, LabelUpdater.name);
zinggers.put(ZinggOptions.FIND_AND_LABEL, FindAndLabeller.name);
zinggers.put(ZinggOptions.PEEK_MODEL, PeekModel.name);
}

public IZingg get(ZinggOptions z) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
Expand All @@ -30,4 +31,4 @@ public IZingg get(ZinggOptions z) throws InstantiationException, IllegalAccessEx



}
}
57 changes: 57 additions & 0 deletions core/src/test/java/zingg/TestPeekModel.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package zingg;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import zingg.client.Arguments;
import zingg.client.ZinggClientException;
import zingg.client.pipe.FilePipe;
import zingg.client.pipe.InMemoryPipe;
import zingg.client.pipe.Pipe;
import zingg.client.util.ColName;
/**end to end integration test*/
public class TestPeekModel extends ZinggSparkTester{
public static final Log LOG = LogFactory.getLog(TestPeekModel.class);


InMemoryPipe outputPipe;

@BeforeEach
public void setUp() throws Exception, ZinggClientException{
args = Arguments.createArgumentsFromJSON(getClass().getResource("/testPeekModel/config.json").getFile());
args.setZinggDir(getClass().getResource("/testFebrl/models").getPath());
Pipe dataPipe = args.getData()[0];
dataPipe.setProp(FilePipe.LOCATION, getClass().getResource("/testPeekModel/test.csv").getPath());
args.setData(new Pipe[]{dataPipe});
outputPipe = new InMemoryPipe(dataPipe);
args.setOutput(new Pipe[]{outputPipe});
}


@Test
public void testOutPut(){
PeekModel pm = new PeekModel();
try {
pm.init(args, "");
pm.setArgs(args);
pm.execute();

Dataset<Row> dfm = pm.getMarkedRecords();
assertEquals(80,dfm.count());


} catch (ZinggClientException e) {
// TODO Auto-generated catch block
fail("did not expect " + e);
}
}

}
109 changes: 109 additions & 0 deletions core/src/test/resources/testPeekModel/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
{
"fieldDefinition":[
{
"fieldName" : "id",
"matchType" : "dont use",
"fields" : "fname",
"dataType": "\"string\""
},
{
"fieldName" : "fname",
"matchType" : "fuzzy",
"fields" : "fname",
"dataType": "\"string\""
},
{
"fieldName" : "lname",
"matchType" : "fuzzy",
"fields" : "lname",
"dataType": "\"string\""
},
{
"fieldName" : "stNo",
"matchType": "exact",
"fields" : "stNo",
"dataType": "\"string\""
},
{
"fieldName" : "add1",
"matchType": "fuzzy",
"fields" : "add1",
"dataType": "\"string\""
},
{
"fieldName" : "add2",
"matchType": "fuzzy",
"fields" : "add2",
"dataType": "\"string\""
},
{
"fieldName" : "city",
"matchType": "fuzzy",
"fields" : "city",
"dataType": "\"string\""
},
{
"fieldName" : "areacode",
"matchType": "exact",
"fields" : "areacode",
"dataType": "\"string\""
},
{
"fieldName" : "state",
"matchType": "exact",
"fields" : "state",
"dataType": "\"string\""
},
{
"fieldName" : "dob",
"matchType": "exact",
"fields" : "dob",
"dataType": "\"string\""
},
{
"fieldName" : "ssn",
"matchType": "exact",
"fields" : "ssn",
"dataType": "\"string\""
}
],
"output" : [{
"name":"output",
"format":"csv",
"props": {
"location": "/tmp/testPeekModel/zinggOutput",
"delimiter": ",",
"header":true
}
}],
"data" : [{
"name":"test",
"format":"csv",
"props": {
"location": "./testPeekModel/test.csv",
"delimiter": ",",
"header":false
},
"schema":
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"fname\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"lname\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"stNo\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add1\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add2\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"city\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"areacode\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"state\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"dob\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"ssn\",\"type\":\"string\",\"nullable\":true}
]
}"
}],
"labelDataSampleSize" : 0.5,
"numPartitions":4,
"modelId": 100,
"zinggDir": "./testFebrl/models"

}

0 comments on commit 2a8f81b

Please sign in to comment.