From 3f47d52bf73d0ae02e6096852c3bcdf7d4bce4d3 Mon Sep 17 00:00:00 2001 From: gnanaprakash-ravi Date: Mon, 11 Mar 2024 12:01:13 +0530 Subject: [PATCH] LinkerTester --- .../common/core/executor/LinkerTester.java | 39 +++++++++++++++++++ .../core/executor/TestExecutorsGeneric.java | 20 ++++++++++ .../src/test/resources/testLinker/test1.csv | 27 +++++++++++++ .../src/test/resources/testLinker/test2.csv | 38 ++++++++++++++++++ .../core/executor/TestSparkExecutors.java | 6 +++ .../src/test/resources/testLinker/test1.csv | 27 +++++++++++++ .../src/test/resources/testLinker/test2.csv | 38 ++++++++++++++++++ 7 files changed, 195 insertions(+) create mode 100644 common/core/src/test/java/zingg/common/core/executor/LinkerTester.java create mode 100644 common/core/src/test/resources/testLinker/test1.csv create mode 100644 common/core/src/test/resources/testLinker/test2.csv create mode 100644 spark/core/src/test/resources/testLinker/test1.csv create mode 100644 spark/core/src/test/resources/testLinker/test2.csv diff --git a/common/core/src/test/java/zingg/common/core/executor/LinkerTester.java b/common/core/src/test/java/zingg/common/core/executor/LinkerTester.java new file mode 100644 index 000000000..ffaff829f --- /dev/null +++ b/common/core/src/test/java/zingg/common/core/executor/LinkerTester.java @@ -0,0 +1,39 @@ +package zingg.common.core.executor; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import zingg.common.client.ZFrame; +import zingg.common.client.ZinggClientException; + +public class LinkerTester extends ExecutorTester { + + public static final Log LOG = LogFactory.getLog(LinkerTester.class); + + public LinkerTester(Linker linker) { + super(linker); + } + + @Override + public void validateResults() throws ZinggClientException { + LOG.info("link successful"); + ZFrame outputData = getOutputData(); + System.out.println("Inside validateResults in LinkerTester" + outputData); + outputData.show(); + System.out.println(outputData.count()); + assertTrue(outputData != null, "Output data is not null"); + } + + @SuppressWarnings("unchecked") + public ZFrame getOutputData() throws ZinggClientException { + // This is giving matcher output, need to get the linker output. + ZFrame output = executor.getContext().getPipeUtil(). + read(false, false, executor.getArgs().getOutput()); + + System.out.println("Inside getOutputData in LinkerTester" + output); + return output; + } + +} \ No newline at end of file diff --git a/common/core/src/test/java/zingg/common/core/executor/TestExecutorsGeneric.java b/common/core/src/test/java/zingg/common/core/executor/TestExecutorsGeneric.java index c9c3d53dd..fae9cff3d 100644 --- a/common/core/src/test/java/zingg/common/core/executor/TestExecutorsGeneric.java +++ b/common/core/src/test/java/zingg/common/core/executor/TestExecutorsGeneric.java @@ -10,6 +10,8 @@ import zingg.common.client.ArgumentsUtil; import zingg.common.client.IArguments; import zingg.common.client.ZinggClientException; +import zingg.common.client.pipe.FilePipe; +import zingg.common.client.pipe.Pipe; public abstract class TestExecutorsGeneric { @@ -79,6 +81,22 @@ public void testExecutors() throws ZinggClientException { matcher.init(args,session); MatcherTester mt = new MatcherTester(matcher); executorTesterList.add(mt); + + Linker linker = getLinker(); + + Pipe dataPipe1 = args.getData()[0]; + String csvPath_1 = getClass().getResource("../../../../testLinker/test1.csv").getPath(); + dataPipe1.setProp(FilePipe.LOCATION, csvPath_1); + + Pipe dataPipe2 = args.getData()[0]; + String csvPath_2 = getClass().getResource("../../../../testLinker/test2.csv").getPath(); + dataPipe2.setProp(FilePipe.LOCATION, csvPath_2); + + args.setData(new Pipe[]{dataPipe1, dataPipe2}); + + linker.init(args,session); + LinkerTester li = new LinkerTester(linker); + executorTesterList.add(li); testExecutors(executorTesterList); } @@ -100,5 +118,7 @@ public void testExecutors(List> executorTesterList protected abstract Trainer getTrainer() throws ZinggClientException; protected abstract Matcher getMatcher() throws ZinggClientException; + + protected abstract Linker getLinker() throws ZinggClientException; } diff --git a/common/core/src/test/resources/testLinker/test1.csv b/common/core/src/test/resources/testLinker/test1.csv new file mode 100644 index 000000000..5b7ab3466 --- /dev/null +++ b/common/core/src/test/resources/testLinker/test1.csv @@ -0,0 +1,27 @@ +rec-1020-org, blake, ryan,4, starling place, berkeley vlge, marsden,5412, nsw,19271027,2402765 +rec-1021-org, thomas, george,1, mcmanus place, stoney creek, north turramurra,3130, sa,19630225,5460534 +rec-1022-org, jackson, eglinton,840, fowles street, mountview, burleigh heads,2830, sa,19830807,2932837 +rec-1023-org, gianni, matson,701, willis street, boonooloo, clifton,3101, vic,19410111,2540080 +rec-1024-org, takeisha, freeborn,6, suttor street, the groves street, wentworth falls,4615, vic,19620206,8111362 +rec-1025-org, emiily, britten,8, kitchener street, hilltop hostel rowethorpe, lake heights,2463, qld,19491021,9588775 +rec-1026-org, xani, green,2, phillip avenue, abbey green, armidale,5108, nsw,19390410,9201057 +rec-1027-org, nathan, smallacombe,20, guthridge crescent, red cross units, sandy bay,6056, sa,19241223,7522263 +rec-1028-org, , eglinton,24, currie crescent, woorinyan, riverwood,3749, qld,19180205,9341716 +rec-1029-org, kylee, stephenson,81, rose scott circuit, cordoba manor, ashfield,4226, vic,19461101,4783085 +rec-103-org, briony, koerbin,146, violet grover place, wybelanah, mill park,2446, nsw,19210210,3808808 +rec-1030-org, emma, crossman,53, mcdowall place, kellhaven, tara,5608, vic,19391027,3561186 +rec-1031-org, samantha, sabieray,68, quandong street, wattle brae, gorokan,4019, wa,19590807,2863290 +rec-1032-org, brooklyn, naar-cafentas,210, duffy street, tourist park, berwick,2481, nsw,19840802,3624304 +rec-1033-org, keziah, painter,18, ainslie avenue, sec 1, torquay,3205, vic,19191031,7801066 +rec-1034-org, erin, maynard,14, wilshire street, warialda, little river,2777, vic,19970430,7429462 +rec-1035-org, jaiden, rollins,48, rossarden street, tulgeywood, balwyn north,2224, nt,19280722,7626396 +rec-1036-org, amber, held,24, lampard circuit, emerald garden, golden bay,2447, vic,19510806,3710651 +rec-1037-org, connor, beckwith,10, heard street, , mill park,5031, nsw,19081103,2209091 +rec-1038-org, danny, campbell,95, totterdell street, moama, shellharbour,2209, vic,19951105,9554924 +rec-1039-org, angus, rosa,62, gormanston crescent, mlc centre, kirwan,3350, sa,19250817,2655081 +rec-104-org, benjamin, carbone,18, wattle street, arthella, orange,3550, vic,19050820,3677127 +rec-1040-org, matilda, mestrov,5, house circuit, retirement village, taringa,3820, qld,19801119,2563135 +rec-1041-org, tyler, froud,8, burramurra avenue, kmart p plaza, san remo,3670, sa,19800916,7812219 +rec-1042-org, kiandra, cowle,2, gatliff place, rustenburg sth, girgarre,3995, qld,19801125,3328205 +rec-1043-org, giorgia, frahn,62, handasyde street, ramano estate locn 1, tallebudgera,4506, vic,19670206,9724789 +rec-1044-org, nicole, carbone,46, schlich street, simpson army barracks, toowoomba,3000, wa,19030926,8190756 diff --git a/common/core/src/test/resources/testLinker/test2.csv b/common/core/src/test/resources/testLinker/test2.csv new file mode 100644 index 000000000..5f1c10875 --- /dev/null +++ b/common/core/src/test/resources/testLinker/test2.csv @@ -0,0 +1,38 @@ +rec-1021-dup-0, thomas, georgze,1, mcmanus place, , north turarmurra,3130, sa,19630225,5460534 +rec-1022-dup-1, jackson, eglinron,840, mountview, fowles treet, burlei gh heads,2803, sa,19830807,2932837 +rec-1022-dup-2, jackson, eglinton,840, fowles street, moun tvjiew, burleigh heads,2830, ss, ,2932837 +rec-1022-dup-3, jackson, christo,840, fowles street, mou ntveiw, burleig heads,2830, sa,19830807,2932837 +rec-1022-dup-4, jackson, eglinton,840, fowles street, mountv iew, burleigh heads,2830, sa,19830807,2932837 +rec-1026-dup-0, xani, green, , phill ip avenue, , armidale,5108, nsw,19390410,9201057 +rec-1026-dup-1, xani, green,2, phillip avenue, abbey green, armidale,5108, nsw,19390410,9201857 +rec-1028-dup-0, , ,24, , woorinyan, riverwood,3749, qld,19180205,9341716 +rec-1028-dup-1, , eglinton,24, curriecrescent, woorinyan, riverwood,3749, qld,19180205,1909717 +rec-1029-dup-0, kylee, stepehndon,81, rose scott circuit, cordobak anor, ashfield,4226, vic,19461101,4783085 +rec-1029-dup-1, sachin, stephenson,81, rose scott circuit, cordoba manor, ashfi eld,4226, vic,19461101,4783085 +rec-1029-dup-2, annalise, stephenson,81, rose scott circuit, cordoba manor, ashfoeld,4226, vic,19461101,4783085 +rec-1029-dup-3, kykee, turale,81, rose scott circuit, , ashfield,4226, vic,19461101,4783085 +rec-1029-dup-4, kylee, stephenson,81, cordoba manor, rose scott circuit, ashfield,4226, vic,19461101,4783085 +rec-103-dup-0, benjamin, koerbin,15, wybel anah, violet grover place, mill park,2446, nsw,19210210,3808808 +rec-1032-dup-0, brooklyn, naar-cafentas,210, duffy street, tourist psrk, berwick,2481, nsw, ,3624304 +rec-1033-dup-0, keziah, painter,18, ainsli e avenue, sec 1, torquay,3205, vic,19191031,7801066 +rec-1034-dup-0, erin, maynard,24, , wariala, little river,2777, vic,19970430,7429462 +rec-1034-dup-1, erin, maynard,51, wilshire street, warialda, little irver,2777, vic,19970430,1815999 +rec-1034-dup-2, hayley, maynard,14, wilshire street, , little river,2777, vic,19970430,7429462 +rec-1035-dup-0, jaiden, rollins,48, tulgeywood, rossarden street, balwyn north,2224, nt,19280722,7626396 +rec-1035-dup-1, jaiden, rollins,95, rossarden street, tulgewyood, balwyn north,2224, nt,19280722,7626396 +rec-1035-dup-2, jaiden, rolilns,48, swinden street, tulgeywood, balwyn north,2224, nt,19280722,7626396 +rec-1035-dup-3, jaiden, rolli ns,48, tulgeywomod, rossarden street, balwyn north,2224, nf,19280722,7626396 +rec-1036-dup-0, , held,24, lampard circuit, emerald garden, golden bay,2447, vic,19510806,3710651 +rec-1036-dup-1, sarsha, held,42, lampard circuit, , golden bay,2447, vic,19510806,3710651 +rec-1039-dup-0, angus, roas,62, gormansto crescent, mlc centre, kiruwah,3350, sa,19250817,2655081 +rec-104-dup-0, benjaminl, carbone,18, arthella, wattle s treet, orange,3550, vic,19050820,3677127 +rec-1040-dup-0, matilda, mestrov, , housecicuit, retirement village, taringa,3820, qld,19801119,2536135 +rec-1040-dup-1, matilda, mestrv,5, house circuit, retirement village, taringa,3802, qld,19801119,2563135 +rec-1040-dup-2, matilda, mestrov,5, house circuit, retiremen tvillage, taringa,3820, ,19801119,2563135 +rec-1041-dup-0, tyler, frojd, , burramurra avenue, kmart p plaza, san rmeo,3670, sa,19800916,7812219 +rec-1042-dup-0, kiandra, ,2, gatliff place, rustenburg sth, girgarre,3995, qld,19801125,3328205 +rec-1042-dup-1, kiandra, cowle,2, gatliff place, rustenubr g sth, girgarre,3995, qld,19801125,3328205 +rec-1044-dup-0, nicole, shadbolt,46, schlich s treet, simpson army barracks, toowoomba,3000, wa,19030926,8190756 +rec-1044-dup-1, nicole, carbone,46, schlich nstreet, simpson army barracks, toowoomba,3000, wa,19030926,8190756 +rec-1044-dup-2, nicole, carbone,46, schlich street, simpson arm ybarracks, toowong,3000, wa,19030926,8190756 +rec-1044-dup-3, nicole, carbone,46, schlich street, simpsonary barracks, toowoomba,3000, wa,19030926,8190756 diff --git a/spark/core/src/test/java/zingg/spark/core/executor/TestSparkExecutors.java b/spark/core/src/test/java/zingg/spark/core/executor/TestSparkExecutors.java index 8128fdc1a..e5657c052 100644 --- a/spark/core/src/test/java/zingg/spark/core/executor/TestSparkExecutors.java +++ b/spark/core/src/test/java/zingg/spark/core/executor/TestSparkExecutors.java @@ -64,6 +64,12 @@ protected SparkMatcher getMatcher() throws ZinggClientException { SparkMatcher sm = new SparkMatcher(ctx); return sm; } + + @Override + protected SparkLinker getLinker() throws ZinggClientException { + SparkLinker sl = new SparkLinker(ctx); + return sl; + } @Override diff --git a/spark/core/src/test/resources/testLinker/test1.csv b/spark/core/src/test/resources/testLinker/test1.csv new file mode 100644 index 000000000..5b7ab3466 --- /dev/null +++ b/spark/core/src/test/resources/testLinker/test1.csv @@ -0,0 +1,27 @@ +rec-1020-org, blake, ryan,4, starling place, berkeley vlge, marsden,5412, nsw,19271027,2402765 +rec-1021-org, thomas, george,1, mcmanus place, stoney creek, north turramurra,3130, sa,19630225,5460534 +rec-1022-org, jackson, eglinton,840, fowles street, mountview, burleigh heads,2830, sa,19830807,2932837 +rec-1023-org, gianni, matson,701, willis street, boonooloo, clifton,3101, vic,19410111,2540080 +rec-1024-org, takeisha, freeborn,6, suttor street, the groves street, wentworth falls,4615, vic,19620206,8111362 +rec-1025-org, emiily, britten,8, kitchener street, hilltop hostel rowethorpe, lake heights,2463, qld,19491021,9588775 +rec-1026-org, xani, green,2, phillip avenue, abbey green, armidale,5108, nsw,19390410,9201057 +rec-1027-org, nathan, smallacombe,20, guthridge crescent, red cross units, sandy bay,6056, sa,19241223,7522263 +rec-1028-org, , eglinton,24, currie crescent, woorinyan, riverwood,3749, qld,19180205,9341716 +rec-1029-org, kylee, stephenson,81, rose scott circuit, cordoba manor, ashfield,4226, vic,19461101,4783085 +rec-103-org, briony, koerbin,146, violet grover place, wybelanah, mill park,2446, nsw,19210210,3808808 +rec-1030-org, emma, crossman,53, mcdowall place, kellhaven, tara,5608, vic,19391027,3561186 +rec-1031-org, samantha, sabieray,68, quandong street, wattle brae, gorokan,4019, wa,19590807,2863290 +rec-1032-org, brooklyn, naar-cafentas,210, duffy street, tourist park, berwick,2481, nsw,19840802,3624304 +rec-1033-org, keziah, painter,18, ainslie avenue, sec 1, torquay,3205, vic,19191031,7801066 +rec-1034-org, erin, maynard,14, wilshire street, warialda, little river,2777, vic,19970430,7429462 +rec-1035-org, jaiden, rollins,48, rossarden street, tulgeywood, balwyn north,2224, nt,19280722,7626396 +rec-1036-org, amber, held,24, lampard circuit, emerald garden, golden bay,2447, vic,19510806,3710651 +rec-1037-org, connor, beckwith,10, heard street, , mill park,5031, nsw,19081103,2209091 +rec-1038-org, danny, campbell,95, totterdell street, moama, shellharbour,2209, vic,19951105,9554924 +rec-1039-org, angus, rosa,62, gormanston crescent, mlc centre, kirwan,3350, sa,19250817,2655081 +rec-104-org, benjamin, carbone,18, wattle street, arthella, orange,3550, vic,19050820,3677127 +rec-1040-org, matilda, mestrov,5, house circuit, retirement village, taringa,3820, qld,19801119,2563135 +rec-1041-org, tyler, froud,8, burramurra avenue, kmart p plaza, san remo,3670, sa,19800916,7812219 +rec-1042-org, kiandra, cowle,2, gatliff place, rustenburg sth, girgarre,3995, qld,19801125,3328205 +rec-1043-org, giorgia, frahn,62, handasyde street, ramano estate locn 1, tallebudgera,4506, vic,19670206,9724789 +rec-1044-org, nicole, carbone,46, schlich street, simpson army barracks, toowoomba,3000, wa,19030926,8190756 diff --git a/spark/core/src/test/resources/testLinker/test2.csv b/spark/core/src/test/resources/testLinker/test2.csv new file mode 100644 index 000000000..5f1c10875 --- /dev/null +++ b/spark/core/src/test/resources/testLinker/test2.csv @@ -0,0 +1,38 @@ +rec-1021-dup-0, thomas, georgze,1, mcmanus place, , north turarmurra,3130, sa,19630225,5460534 +rec-1022-dup-1, jackson, eglinron,840, mountview, fowles treet, burlei gh heads,2803, sa,19830807,2932837 +rec-1022-dup-2, jackson, eglinton,840, fowles street, moun tvjiew, burleigh heads,2830, ss, ,2932837 +rec-1022-dup-3, jackson, christo,840, fowles street, mou ntveiw, burleig heads,2830, sa,19830807,2932837 +rec-1022-dup-4, jackson, eglinton,840, fowles street, mountv iew, burleigh heads,2830, sa,19830807,2932837 +rec-1026-dup-0, xani, green, , phill ip avenue, , armidale,5108, nsw,19390410,9201057 +rec-1026-dup-1, xani, green,2, phillip avenue, abbey green, armidale,5108, nsw,19390410,9201857 +rec-1028-dup-0, , ,24, , woorinyan, riverwood,3749, qld,19180205,9341716 +rec-1028-dup-1, , eglinton,24, curriecrescent, woorinyan, riverwood,3749, qld,19180205,1909717 +rec-1029-dup-0, kylee, stepehndon,81, rose scott circuit, cordobak anor, ashfield,4226, vic,19461101,4783085 +rec-1029-dup-1, sachin, stephenson,81, rose scott circuit, cordoba manor, ashfi eld,4226, vic,19461101,4783085 +rec-1029-dup-2, annalise, stephenson,81, rose scott circuit, cordoba manor, ashfoeld,4226, vic,19461101,4783085 +rec-1029-dup-3, kykee, turale,81, rose scott circuit, , ashfield,4226, vic,19461101,4783085 +rec-1029-dup-4, kylee, stephenson,81, cordoba manor, rose scott circuit, ashfield,4226, vic,19461101,4783085 +rec-103-dup-0, benjamin, koerbin,15, wybel anah, violet grover place, mill park,2446, nsw,19210210,3808808 +rec-1032-dup-0, brooklyn, naar-cafentas,210, duffy street, tourist psrk, berwick,2481, nsw, ,3624304 +rec-1033-dup-0, keziah, painter,18, ainsli e avenue, sec 1, torquay,3205, vic,19191031,7801066 +rec-1034-dup-0, erin, maynard,24, , wariala, little river,2777, vic,19970430,7429462 +rec-1034-dup-1, erin, maynard,51, wilshire street, warialda, little irver,2777, vic,19970430,1815999 +rec-1034-dup-2, hayley, maynard,14, wilshire street, , little river,2777, vic,19970430,7429462 +rec-1035-dup-0, jaiden, rollins,48, tulgeywood, rossarden street, balwyn north,2224, nt,19280722,7626396 +rec-1035-dup-1, jaiden, rollins,95, rossarden street, tulgewyood, balwyn north,2224, nt,19280722,7626396 +rec-1035-dup-2, jaiden, rolilns,48, swinden street, tulgeywood, balwyn north,2224, nt,19280722,7626396 +rec-1035-dup-3, jaiden, rolli ns,48, tulgeywomod, rossarden street, balwyn north,2224, nf,19280722,7626396 +rec-1036-dup-0, , held,24, lampard circuit, emerald garden, golden bay,2447, vic,19510806,3710651 +rec-1036-dup-1, sarsha, held,42, lampard circuit, , golden bay,2447, vic,19510806,3710651 +rec-1039-dup-0, angus, roas,62, gormansto crescent, mlc centre, kiruwah,3350, sa,19250817,2655081 +rec-104-dup-0, benjaminl, carbone,18, arthella, wattle s treet, orange,3550, vic,19050820,3677127 +rec-1040-dup-0, matilda, mestrov, , housecicuit, retirement village, taringa,3820, qld,19801119,2536135 +rec-1040-dup-1, matilda, mestrv,5, house circuit, retirement village, taringa,3802, qld,19801119,2563135 +rec-1040-dup-2, matilda, mestrov,5, house circuit, retiremen tvillage, taringa,3820, ,19801119,2563135 +rec-1041-dup-0, tyler, frojd, , burramurra avenue, kmart p plaza, san rmeo,3670, sa,19800916,7812219 +rec-1042-dup-0, kiandra, ,2, gatliff place, rustenburg sth, girgarre,3995, qld,19801125,3328205 +rec-1042-dup-1, kiandra, cowle,2, gatliff place, rustenubr g sth, girgarre,3995, qld,19801125,3328205 +rec-1044-dup-0, nicole, shadbolt,46, schlich s treet, simpson army barracks, toowoomba,3000, wa,19030926,8190756 +rec-1044-dup-1, nicole, carbone,46, schlich nstreet, simpson army barracks, toowoomba,3000, wa,19030926,8190756 +rec-1044-dup-2, nicole, carbone,46, schlich street, simpson arm ybarracks, toowong,3000, wa,19030926,8190756 +rec-1044-dup-3, nicole, carbone,46, schlich street, simpsonary barracks, toowoomba,3000, wa,19030926,8190756