Skip to content
This repository has been archived by the owner on Nov 29, 2022. It is now read-only.

Commit

Permalink
Adjust practical max calculation to prevent potentially negative valu…
Browse files Browse the repository at this point in the history
…es. Add in a few more unit tests. (#9)

* Bugfix with practical max curve -- in a very specific case where there are more treatment group responders than control group NON-responders, the practical max curve would be negative. Wrote code to preserve original shape of curve, but prevent this from happening.

* Add in more complex unit tests with odd policy.
  • Loading branch information
rsyi committed Oct 31, 2018
1 parent 2bda837 commit 31d2147
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 24 deletions.
14 changes: 10 additions & 4 deletions pylift/eval.py
Expand Up @@ -258,14 +258,20 @@ def _maximal_qini_curve(func, Nt1o1, Nt0o1, Nt1o0, Nt0o0):
"""
Nt1, Nt0, N = _get_tc_counts(Nt1o1, Nt0o1, Nt1o0, Nt0o0)
persuadables, dogs, sure_things, lost_causes = func(Nt1o1, Nt0o1, Nt1o0, Nt0o0)
# Calculate the number of persuadables in the treated group to get the qini y value.
# For the overfit case, this is simply the entire treated group.
if func == _get_overfit_counts:
persuadables_treated = Nt1o1
slope = 2
else:
persuadables_treated = persuadables*Nt1/N
y = [0, persuadables_treated/Nt1, persuadables_treated/Nt1, (Nt1o1/Nt1-Nt0o1/Nt0)]
slope = 1
x = [0, persuadables/N, 1-dogs/N, 1]
# Deal with edge case where number of persuadables is greater than sleeping
# dogs (common if this is not a treatment/control experiment, but an
# experiment between two treatments).
if x[1]>x[2]:
new_val = (x[1]+x[2])/2
x[1] = new_val
x[2] = new_val
y = [0, x[1]*slope, x[1]*slope, (Nt1o1/Nt1-Nt0o1/Nt0)]

return x, y

Expand Down
68 changes: 48 additions & 20 deletions tests/eval_test.py
Expand Up @@ -3,40 +3,68 @@

from pylift.eval import _get_counts, _get_tc_counts, _get_no_sure_thing_counts, _get_no_sleeping_dog_counts, _get_overfit_counts, get_scores

class TestScores(unittest.TestCase):
"""Test `eval.get_scores`.
treatment = np.array([1,1,0,1,0])
outcome = np.array([1,1,1,0,0])
p = np.array([0.75,0.5,0.75,0.75,0.5])
prediction = np.array([1,0.9,0.8,0.5,0.6])
Nt1o1, Nt0o1, Nt1o0, Nt0o0 = _get_counts(treatment, outcome, p)
scores = get_scores(treatment, outcome, prediction, p)

simple_counts_example = (4/3, 2, 2/3, 1)

treatment_simple = [1,0,1,0]
outcome_simple = [1,0,0,1]
prediction_simple = [1, 0.5, 0.25, 0]
p_simple = [0.5, 0.5, 0.5, 0.5]
scores_simple = get_scores(treatment_simple, outcome_simple, prediction_simple, p_simple)

class CountsAndScores(unittest.TestCase):
"""Test all counting and scoring functions.
"""
def test_cgains(self):
self.assertAlmostEqual(get_scores([1,0,1,0], [1,0,0,1], [1, 0.5, 0.25, 0], [0.5, 0.5, 0.5, 0.5])['Q_cgains'], 0.28125, msg="Incorrect cumulative gains score.")

# The following tests were calculated with the package and each step was verified.
def test_Nt1o1(self):
self.assertAlmostEqual(Nt1o1, 5/3)

def test_Nt0o1(self):
self.assertAlmostEqual(Nt0o1, 2)

def test_Nt1o0(self):
self.assertAlmostEqual(Nt1o0, 2/3)

def test_Nt0o0(self):
self.assertAlmostEqual(Nt0o0, 1)

def test_aqini(self):
self.assertAlmostEqual(get_scores([1,0,1,0], [1,0,0,1], [1, 0.5, 0.25, 0], [0.5, 0.5, 0.5, 0.5])['Q_aqini'], 0.375, msg="Incorrect cumulative gains score.")
self.assertAlmostEqual(scores['Q_aqini'], 0.21279762)

def test_qini(self):
self.assertAlmostEqual(get_scores([1,0,1,0], [1,0,0,1], [1, 0.5, 0.25, 0], [0.5, 0.5, 0.5, 0.5])['Q_qini'], 0.375, msg="Incorrect cumulative gains score.")
self.assertAlmostEqual(scores['Q_qini'], 0.18601190)

treatment = np.array([1,1,0,1,0])
outcome = np.array([1,1,1,0,0])
p = np.array([0.75,0.75,0.75,0.75,0.5])
Nt1o1, Nt0o1, Nt1o0, Nt0o0 = _get_counts(treatment, outcome, p)
def test_cgains(self):
self.assertAlmostEqual(scores['Q_cgains'], 0.11737351)

class Counts(unittest.TestCase):
"""Test all counting functions.
def test_practical_max(self):
self.assertAlmostEqual(scores['Q_practical_max'], 0.24943311)

"""
def test_max(self):
self.assertAlmostEqual(scores['Q_max'], 0.39527530)

# The following tests were calculated by hand.
def test_counts(self):
self.assertTrue(np.allclose(_get_counts(treatment, outcome, p), (4/3, 2, 2/3, 1)))
self.assertTrue(np.allclose(_get_counts(treatment, outcome, p), (5/3, 2, 2/3, 1)))

def test_nostcounts(self):
self.assertTrue(np.allclose(_get_no_sure_thing_counts(Nt1o1, Nt0o1, Nt1o0, Nt0o0), (10/3, 10/3, 0, -5/3)))
# The following were also calculated by hand, but are redundant to earlier
# tests. These examples are a nice in that they are a bit simpler, though.
def test_cgains(self):
self.assertAlmostEqual(scores_simple['Q_cgains'], 0.28125, msg="Incorrect cumulative gains score.")

def test_nosdcounts(self):
self.assertTrue(np.allclose(_get_no_sleeping_dog_counts(Nt1o1, Nt0o1, Nt1o0, Nt0o0), (0, 0, 10/3, 5/3)))
def test_aqini(self):
self.assertAlmostEqual(scores_simple['Q_aqini'], 0.375, msg="Incorrect cumulative gains score.")

def test_ofcounts(self):
self.assertTrue(np.allclose(_get_overfit_counts(Nt1o1, Nt0o1, Nt1o0, Nt0o0), (4/3, 2, 0, 5/3)))
def test_qini(self):
self.assertAlmostEqual(scores_simple['Q_qini'], 0.375, msg="Incorrect cumulative gains score.")

if __name__ == '__main__':
unittest.main()

0 comments on commit 31d2147

Please sign in to comment.