zinggAI · sonalgoyal · Jul 16, 2022 · Jul 14, 2022 · Jul 16, 2022
diff --git a/test/__init__.py b/test/__init__.py
diff --git a/test/pythonTest.py b/test/pythonTest.py
@@ -0,0 +1,27 @@
+"""
+import glob
+import unittest
+
+testSuite = unittest.TestSuite()
+test_file_strings = glob.glob('test_*.py')
+module_strings = [str[0:len(str)-3] for str in test_file_strings]
+[__import__(str) for str in module_strings]
+suites = [unittest.TestLoader().loadTestsFromName(str) for str in module_strings]
+[testSuite.addTest(suite) for suite in suites]
+print(testSuite)
+
+result = unittest.TestResult()
+testSuite.run(result)
+print(result)
+
+#Ok, at this point I have a result
+#How do I display it as the normal unit test command line output?
+if __name__ == "__main__":
+    unittest.main()
+"""
+import unittest
+
+if __name__ == '__main__':
+    testsuite = unittest.TestLoader().discover('.')
+    unittest.TextTestRunner(verbosity=1).run(testsuite)
+
diff --git a/test/testFebrl/__init__.py b/test/testFebrl/__init__.py
diff --git a/test/testFebrl/testFebrl.py b/test/testFebrl/testFebrl.py
@@ -0,0 +1,65 @@
+import unittest
+from unittest.case import TestCase
+import unittest
+from io import StringIO
+
+
+from zingg import *
+from zingg.pipes import *
+
+args = Arguments()
+fname = FieldDefinition("fname", "string", MatchType.FUZZY)
+lname = FieldDefinition("lname", "string", MatchType.FUZZY)
+stNo = FieldDefinition("stNo", "string", MatchType.FUZZY)
+add1 = FieldDefinition("add1","string", MatchType.FUZZY)
+add2 = FieldDefinition("add2", "string", MatchType.FUZZY)
+city = FieldDefinition("city", "string", MatchType.FUZZY)
+areacode = FieldDefinition("areacode", "string", MatchType.FUZZY)
+state = FieldDefinition("state", "string", MatchType.FUZZY)
+dob = FieldDefinition("dob", "string", MatchType.FUZZY)
+ssn = FieldDefinition("ssn", "string", MatchType.FUZZY)
+
+fieldDefs = [fname, lname, stNo, add1, add2, city, areacode, state, dob, ssn]
+
+args.setFieldDefinition(fieldDefs)
+args.setModelId("100")
+args.setZinggDir("models")
+args.setNumPartitions(4)
+args.setLabelDataSampleSize(0.5)
+
+df = spark.read.format("csv").schema("id string, fname string, lname string, stNo string, add1 string, add2 string, city string, state string, areacode string, dob string, ssn  string").load("examples/febrl/test.csv")
+inputPipe = CsvPipe("test")
+inputPipe.setLocation("examples/febrl/test.csv")
+dfSchema = str(df.schema.json())
+inputPipe.setSchema(dfSchema)
+
+outputPipe = CsvPipe("result")
+outputPipe.setLocation("/tmp/pythonTest")
+
+args.setData(inputPipe)
+args.setOutput(outputPipe)
+
+options = ClientOptions()
+# options.setPhase("trainMatch")
+options.setPhase("trainMatch")
+
+#testing
+
+class Accuracy_recordCount(TestCase):
+	def test_recordCount(self):
+		client = Zingg(args, options)
+		client.initAndExecute()
+		pMarkedDF = client.getPandasDfFromDs(client.getMarkedRecords())
+		labelledData = spark.createDataFrame(pMarkedDF)
+
+		total_marked = pMarkedDF.shape[0]
+
+		# marked record count test
+		self.assertEqual(total_marked, 76)
+
+		pMarkedDF.drop(pMarkedDF[pMarkedDF[ColName.PREDICTION_COL] == -1].index, inplace=True)
+		acc = (pMarkedDF[ColName.MATCH_FLAG_COL]== pMarkedDF[ColName.PREDICTION_COL]).mean()
+
+		# accuracy test
+		self.assertGreater(acc, 0.9)
+