Adjust practical max calculation to prevent potentially negative valu…

…es. Add in a few more unit tests. (#9) * Bugfix with practical max curve -- in a very specific case where there are more treatment group responders than control group NON-responders, the practical max curve would be negative. Wrote code to preserve original shape of curve, but prevent this from happening. * Add in more complex unit tests with odd policy.
wayfair · Oct 31, 2018 · 31d2147 · 31d2147
1 parent 2bda837
commit 31d2147
Show file tree

Hide file tree

Showing 2 changed files with 58 additions and 24 deletions.
diff --git a/pylift/eval.py b/pylift/eval.py
@@ -258,14 +258,20 @@ def _maximal_qini_curve(func, Nt1o1, Nt0o1, Nt1o0, Nt0o0):
     """
     Nt1, Nt0, N = _get_tc_counts(Nt1o1, Nt0o1, Nt1o0, Nt0o0)
     persuadables, dogs, sure_things, lost_causes = func(Nt1o1, Nt0o1, Nt1o0, Nt0o0)
-    # Calculate the number of persuadables in the treated group to get the qini y value.
     # For the overfit case, this is simply the entire treated group.
     if func == _get_overfit_counts:
-        persuadables_treated = Nt1o1
+        slope = 2
     else:
-        persuadables_treated = persuadables*Nt1/N
-    y = [0, persuadables_treated/Nt1, persuadables_treated/Nt1, (Nt1o1/Nt1-Nt0o1/Nt0)]
+        slope = 1
     x = [0, persuadables/N, 1-dogs/N, 1]
+    # Deal with edge case where number of persuadables is greater than sleeping
+    # dogs (common if this is not a treatment/control experiment, but an
+    # experiment between two treatments).
+    if x[1]>x[2]:
+        new_val = (x[1]+x[2])/2
+        x[1] = new_val
+        x[2] = new_val
+    y = [0, x[1]*slope, x[1]*slope, (Nt1o1/Nt1-Nt0o1/Nt0)]
 
     return x, y
 

diff --git a/tests/eval_test.py b/tests/eval_test.py
@@ -3,40 +3,68 @@
 
 from pylift.eval import _get_counts, _get_tc_counts, _get_no_sure_thing_counts, _get_no_sleeping_dog_counts, _get_overfit_counts, get_scores
 
-class TestScores(unittest.TestCase):
-    """Test `eval.get_scores`.
+treatment = np.array([1,1,0,1,0])
+outcome = np.array([1,1,1,0,0])
+p = np.array([0.75,0.5,0.75,0.75,0.5])
+prediction = np.array([1,0.9,0.8,0.5,0.6])
+Nt1o1, Nt0o1, Nt1o0, Nt0o0 = _get_counts(treatment, outcome, p)
+scores = get_scores(treatment, outcome, prediction, p)
+
+simple_counts_example = (4/3, 2, 2/3, 1)
+
+treatment_simple = [1,0,1,0]
+outcome_simple = [1,0,0,1]
+prediction_simple = [1, 0.5, 0.25, 0]
+p_simple = [0.5, 0.5, 0.5, 0.5]
+scores_simple = get_scores(treatment_simple, outcome_simple, prediction_simple, p_simple)
+
+class CountsAndScores(unittest.TestCase):
+    """Test all counting and scoring functions.
 
     """
-    def test_cgains(self):
-        self.assertAlmostEqual(get_scores([1,0,1,0], [1,0,0,1], [1, 0.5, 0.25, 0], [0.5, 0.5, 0.5, 0.5])['Q_cgains'], 0.28125, msg="Incorrect cumulative gains score.")
+
+    # The following tests were calculated with the package and each step was verified.
+    def test_Nt1o1(self):
+        self.assertAlmostEqual(Nt1o1, 5/3)
+
+    def test_Nt0o1(self):
+        self.assertAlmostEqual(Nt0o1, 2)
+
+    def test_Nt1o0(self):
+        self.assertAlmostEqual(Nt1o0, 2/3)
+
+    def test_Nt0o0(self):
+        self.assertAlmostEqual(Nt0o0, 1)
 
     def test_aqini(self):
-        self.assertAlmostEqual(get_scores([1,0,1,0], [1,0,0,1], [1, 0.5, 0.25, 0], [0.5, 0.5, 0.5, 0.5])['Q_aqini'], 0.375, msg="Incorrect cumulative gains score.")
+        self.assertAlmostEqual(scores['Q_aqini'], 0.21279762)
 
     def test_qini(self):
-        self.assertAlmostEqual(get_scores([1,0,1,0], [1,0,0,1], [1, 0.5, 0.25, 0], [0.5, 0.5, 0.5, 0.5])['Q_qini'], 0.375, msg="Incorrect cumulative gains score.")
+        self.assertAlmostEqual(scores['Q_qini'], 0.18601190)
 
-treatment = np.array([1,1,0,1,0])
-outcome = np.array([1,1,1,0,0])
-p = np.array([0.75,0.75,0.75,0.75,0.5])
-Nt1o1, Nt0o1, Nt1o0, Nt0o0 = _get_counts(treatment, outcome, p)
+    def test_cgains(self):
+        self.assertAlmostEqual(scores['Q_cgains'], 0.11737351)
 
-class Counts(unittest.TestCase):
-    """Test all counting functions.
+    def test_practical_max(self):
+        self.assertAlmostEqual(scores['Q_practical_max'], 0.24943311)
 
-    """
+    def test_max(self):
+        self.assertAlmostEqual(scores['Q_max'], 0.39527530)
 
+    # The following tests were calculated by hand.
     def test_counts(self):
-        self.assertTrue(np.allclose(_get_counts(treatment, outcome, p), (4/3, 2, 2/3, 1)))
+        self.assertTrue(np.allclose(_get_counts(treatment, outcome, p), (5/3, 2, 2/3, 1)))
 
-    def test_nostcounts(self):
-        self.assertTrue(np.allclose(_get_no_sure_thing_counts(Nt1o1, Nt0o1, Nt1o0, Nt0o0), (10/3, 10/3, 0, -5/3)))
+    # The following were also calculated by hand, but are redundant to earlier
+    # tests. These examples are a nice in that they are a bit simpler, though.
+    def test_cgains(self):
+        self.assertAlmostEqual(scores_simple['Q_cgains'], 0.28125, msg="Incorrect cumulative gains score.")
 
-    def test_nosdcounts(self):
-        self.assertTrue(np.allclose(_get_no_sleeping_dog_counts(Nt1o1, Nt0o1, Nt1o0, Nt0o0), (0, 0, 10/3, 5/3)))
+    def test_aqini(self):
+        self.assertAlmostEqual(scores_simple['Q_aqini'], 0.375, msg="Incorrect cumulative gains score.")
 
-    def test_ofcounts(self):
-        self.assertTrue(np.allclose(_get_overfit_counts(Nt1o1, Nt0o1, Nt1o0, Nt0o0), (4/3, 2, 0, 5/3)))
+    def test_qini(self):
+        self.assertAlmostEqual(scores_simple['Q_qini'], 0.375, msg="Incorrect cumulative gains score.")
 
 if __name__ == '__main__':
     unittest.main()