Skip to content

Commit

Permalink
Merge pull request #239 from zalando/chi_square_more_statistics
Browse files Browse the repository at this point in the history
Added observed and expected frequencies to chi-square statistics
  • Loading branch information
daryadedik committed Sep 4, 2018
2 parents 4ab75fa + fc586b0 commit deb50aa
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
4 changes: 2 additions & 2 deletions expan/core/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def chi_square_test_result_and_statistics(self, variant_column, weights, min_cou
:param alpha: significance level, 0.05 by default
:return: True(if split is consistent with the given split) or
False(if split is not consistent with the given split)
:rtype: Boolean, float, float
:rtype: Boolean, float, float, pd.Series, pd.Series
"""
if not hasattr(variant_column, '__len__'):
raise ValueError("Variant split check was cancelled since input variant column is empty or doesn't exist.")
Expand Down Expand Up @@ -351,4 +351,4 @@ def chi_square_test_result_and_statistics(self, variant_column, weights, min_cou
# Compute chi-square and p-value statistics
chi_square_val, p_val = statx.chi_square(observed_freqs.sort_index(), expected_freqs.sort_index())

return p_val >= alpha, p_val, chi_square_val
return p_val >= alpha, p_val, chi_square_val, observed_freqs, expected_freqs
13 changes: 13 additions & 0 deletions tests/tests_core/test_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,13 @@ def test_chi_square_test_result_and_statistics_same_weights(self):
self.assertEqual(result[0], True)
self.assertAlmostEqual(result[1], 0.8087921354109989)
self.assertAlmostEqual(result[2], 1.6)
self.assertEqual(result[3]['A'], 23)
self.assertEqual(result[3]['B'], 18)
self.assertEqual(result[3]['C'], 17)
self.assertEqual(result[3]['D'], 19)
self.assertEqual(result[3]['E'], 23)

self.assertTrue(all([val == 20 for val in result[4]]))

def test_chi_square_test_result_and_statistics_different_weights(self):
exp = self.getExperiment()
Expand All @@ -377,6 +384,8 @@ def test_chi_square_test_result_and_statistics_different_weights(self):
self.assertEqual(result[0], False)
self.assertAlmostEqual(result[1], 9.064563321754584e-07)
self.assertAlmostEqual(result[2], 33.585)
self.assertEqual(len(result[3]), 5)
self.assertEqual(len(result[4]), 5)

def test_chi_square_test_result_and_statistics_2_categories(self):
exp = self.getExperiment()
Expand All @@ -386,6 +395,10 @@ def test_chi_square_test_result_and_statistics_2_categories(self):
self.assertEqual(result[0], True)
self.assertAlmostEqual(result[1], 1.0)
self.assertAlmostEqual(result[2], 0.0)
self.assertEqual(result[3]['A'], 17)
self.assertEqual(result[4]['A'], 17)
self.assertEqual(result[3]['B'], 17)
self.assertEqual(result[4]['B'], 17)

def test_chi_square_test_result_and_statistics_NaN_data(self):
exp = self.getExperiment()
Expand Down

0 comments on commit deb50aa

Please sign in to comment.