zalando · jbao · Aug 19, 2016 · Aug 9, 2016 · Aug 9, 2016 · Aug 9, 2016
diff --git a/.travis.yml b/.travis.yml
@@ -17,13 +17,16 @@ deploy:
 env:
 - TOXENV=py
 install:
-- pip install -U pip setuptools tox
+- pip install -U pip setuptools tox coveralls
 - pip install -r requirements.txt
 language: python
 python:
+#- pypy
 #- 2.6
 - 2.7
 #- 3.3
 - 3.4
 - 3.5
 script: tox
+after_success:
+- coveralls
diff --git a/README.rst b/README.rst
@@ -17,11 +17,6 @@ ExpAn: Experiment Analysis
    :target: https://pypi.python.org/pypi/expan
    :alt: Python Versions
 
-
-.. image:: https://img.shields.io/pypi/dw/expan.svg
-        :target: https://pypi.python.org/pypi/expan/
-        :alt: PyPI Downloads
-
 .. image:: https://img.shields.io/pypi/l/expan.svg
         :target: https://pypi.python.org/pypi/expan/
         :alt: License

diff --git a/expan/core/experiment.py b/expan/core/experiment.py
@@ -3,6 +3,7 @@
 
 # import numpy as np
 
+import re
 import expan.core.statistics as statx
 import warnings
 
@@ -114,17 +115,36 @@ def do_delta(f, bin_name):
 
 def _delta_all_variants(metric_df, baseline_variant, assume_normal=True,
 						percentiles=[2.5, 97.5], min_observations=20,
-						nruns=10000, relative=False):
-	"""Applies delta to all variants, given a metric."""
+						nruns=10000, relative=False, weighted=False):
+	"""Applies delta to all variants, given a metric and a baseline variant.
+
+	metric_df has 4 columns: entity, variant, metric, reference_kpi
+	"""
 	baseline_metric = metric_df.iloc[:, 2][metric_df.iloc[:, 1] == baseline_variant]
-	do_delta = (lambda f: delta_to_dataframe_all_variants(f.columns[2],
-														  *statx.delta(
-															  x=f.iloc[:, 2],
-															  y=baseline_metric,
-															  assume_normal=assume_normal,
-															  percentiles=percentiles,
-															  min_observations=min_observations,
-															  nruns=nruns, relative=relative)))
+	baseline_weights = metric_df.iloc[:, 3][metric_df.iloc[:, 1] == baseline_variant]
+
+	if weighted:
+		do_delta = (lambda f: delta_to_dataframe_all_variants(f.columns[2],
+															  *statx.delta(
+																  x=f.iloc[:, 2],
+																  y=baseline_metric,
+																  assume_normal=assume_normal,
+																  percentiles=percentiles,
+																  min_observations=min_observations,
+																  nruns=nruns, 
+																  relative=relative,
+																  x_weights=f.iloc[:,3]/sum(f.iloc[:,3])*len(f.iloc[:,3]),
+																  y_weights=baseline_weights/sum(baseline_weights)*len(baseline_weights))))
+	else:
+		do_delta = (lambda f: delta_to_dataframe_all_variants(f.columns[2],
+															  *statx.delta(
+																  x=f.iloc[:, 2],
+																  y=baseline_metric,
+																  assume_normal=assume_normal,
+																  percentiles=percentiles,
+																  min_observations=min_observations,
+																  nruns=nruns, 
+																  relative=relative)))
 	# Actual calculation
 	return metric_df.groupby('variant').apply(do_delta).unstack(0)
 
@@ -327,9 +347,9 @@ def __str__(self):
 
 		return res
 
-	def delta(self, kpi_subset=None, variant_subset=None,
+	def delta(self, kpi_subset=None, derived_kpis=None, variant_subset=None,
 			  assume_normal=True, percentiles=[2.5, 97.5],
-			  min_observations=20, nruns=10000, relative=False):
+			  min_observations=20, nruns=10000, relative=False, weighted_kpis=None):
 		"""
 	    Compute delta (with confidence bounds) on all applicable kpis,
 	    and returns in the standard Results format.
@@ -341,6 +361,9 @@ def delta(self, kpi_subset=None, variant_subset=None,
 	    Args:
 	        kpi_subset (list): kpis for which to perfom delta. If set to
 	            None all kpis are used.
+	        derived_kpis (list): definition of additional KPIs derived from the
+	        	primary ones, e.g. 
+	        	[{'name':'return_rate', 'formula':'returned/ordered'}] 
 	        variant_subset (list): Variants to use compare against baseline. If
 	            set to None all variants are used.
 
@@ -357,13 +380,31 @@ def delta(self, kpi_subset=None, variant_subset=None,
 	            mean-ret_val[0] to mean+ret_val[1]. This is more useful in many
 	            situations because it corresponds with the sem() and std()
 	            functions.
+	        weighted_kpis (list): a list of metric names. For each metric 
+	        	in the list, the weighted mean and confidence intervals
+	        	are calculated, which is equivalent to the overall metric.
+	        	Otherwise the metrics are unweighted, this weighted approach 
+	        	is only relevant for ratios.
 
 	    Returns:
 	        Results object containing the computed deltas.
 	    """
 		res = Results(None, metadata=self.metadata)
+		res.metadata['reference_kpi'] = {}
+		res.metadata['weighted_kpis'] = weighted_kpis
 
+		# determine the complete KPI name list
 		kpis_to_analyse = self.kpi_names.copy()
+		if derived_kpis is not None:
+			for dk in derived_kpis:
+				kpis_to_analyse.update([dk['name']])
+				# assuming the columns in the formula can all be cast into float
+				# and create the derived KPI as an additional column
+				self.kpis.loc[:,dk['name']] = eval(re.sub('('+'|'.join(self.kpi_names)+')', r'self.kpis.\1.astype(float)', dk['formula']))
+				# store the reference metric name to be used in the weighting
+				# TODO: only works for ratios
+				res.metadata['reference_kpi'][dk['name']] = re.sub('('+'|'.join(self.kpi_names)+')/', '', dk['formula'])
+
 		if kpi_subset is not None:
 			kpis_to_analyse.intersection_update(kpi_subset)
 		self.dbg(3, 'kpis_to_analyse: ' + ','.join(kpis_to_analyse))
@@ -375,14 +416,26 @@ def delta(self, kpi_subset=None, variant_subset=None,
 		self.dbg(3, 'treat_variants to analyse: ' + ','.join(treat_variants))
 
 		for mname in kpis_to_analyse:
+			# the weighted approach implies that derived_kpis is not None
+			if weighted_kpis is not None and mname in weighted_kpis:
+				reference_kpi = res.metadata['reference_kpi'][mname]
+				weighted = True
+			else:
+				reference_kpi = mname
+				weighted = False
+
 			try:
 				with warnings.catch_warnings(record=True) as w:
 					# Cause all warnings to always be triggered.
 					warnings.simplefilter("always")
-					df = (_delta_all_variants(self.kpis.reset_index()[['entity', 'variant', mname]],
-											  self.baseline_variant, assume_normal=assume_normal,
-											  percentiles=percentiles, min_observations=min_observations,
-											  nruns=nruns, relative=relative))
+					df = (_delta_all_variants(self.kpis.reset_index()[['entity', 'variant', mname, reference_kpi]],
+											  self.baseline_variant, 
+											  assume_normal=assume_normal,
+											  percentiles=percentiles, 
+											  min_observations=min_observations,
+											  nruns=nruns, 
+											  relative=relative, 
+											  weighted=weighted))
 					if len(w):
 						res.metadata['warnings']['Experiment.delta'] = w[-1].message
 
@@ -620,8 +673,9 @@ def trend(self, kpi_subset=None, variant_subset=None,
 	metrics, metadata = generate_random_data()
 	metrics['time_since_treatment'] = metrics['treatment_start_time']
 	exp = Experiment('B', metrics, metadata, [4, 6])
-	# Perform sga()
-	result = exp.trend()
+	res = exp.delta(kpi_subset=['derived'], 
+			derived_kpis=[{'name':'derived','formula':'normal_same/normal_shifted'}],
+			weighted_kpis=['derived'])
 
 # result = time_dependent_deltas(data.metrics.reset_index()
 #	[['variant','time_since_treatment','normal_shifted']],variants=['A','B']).df.loc[:,1]

diff --git a/expan/core/statistics.py b/expan/core/statistics.py
@@ -11,7 +11,7 @@ def _delta_mean(x, y):
 
 
 def delta(x, y, assume_normal=True, percentiles=[2.5, 97.5],
-		  min_observations=20, nruns=10000, relative=False):
+		  min_observations=20, nruns=10000, relative=False, x_weights=1, y_weights=1):
 	"""
 	Calculates the difference of means between the samples (x-y) in a
 	statistical sense, i.e. with confidence intervals.
@@ -38,6 +38,14 @@ def delta(x, y, assume_normal=True, percentiles=[2.5, 97.5],
 			absolute values. In	this case, the interval is mean-ret_val[0] to
 			mean+ret_val[1]. This is more useful in many situations because it
 			corresponds with the sem() and std() functions.
+		x_weights (list): weights for the x vector, in order to calculate 
+			the weighted mean and confidence intervals, which is equivalent 
+			to the overall metric. This weighted approach is only relevant 
+			for ratios.
+		y_weights (list): weights for the y vector, in order to calculate 
+			the weighted mean and confidence intervals, which is equivalent 
+			to the overall metric. This weighted approach is only relevant 
+			for ratios.
 
 	Returns:
 		float: mean value of the difference
@@ -52,8 +60,8 @@ def delta(x, y, assume_normal=True, percentiles=[2.5, 97.5],
 		raise ValueError('Please provide two non-None samples.')
 
 	# Coercing missing values to right format
-	_x = np.array(x, dtype=float)
-	_y = np.array(y, dtype=float)
+	_x = np.array(x, dtype=float) * x_weights
+	_y = np.array(y, dtype=float) * y_weights
 
 	x_nan = np.isnan(_x).sum()
 	y_nan = np.isnan(_y).sum()

diff --git a/expan/data/csv_fetcher.py b/expan/data/csv_fetcher.py
@@ -11,7 +11,6 @@
 from os.path import isfile, join
 
 import simplejson as json
-
 from expan.core.experimentdata import *
 
 
@@ -27,24 +26,14 @@ def get_data(folder_path):
 		for f in files:
 
 			if 'metrics' in f:
-				try:
 					metrics = pd.read_csv(folder_path + '/' + f)
-				except Exception as e:
-					print()
-					e
 
 			elif 'metadata' in f:
-				try:
-					with open(folder_path + '/' + f, 'r') as input_json:
-						metadata = json.load(input_json)
-				except ValueError as e:
-					print()
-					e
-					raise
+				with open(folder_path + '/' + f, 'r') as input_json:
+					metadata = json.load(input_json)
 
 		return ExperimentData(metrics=metrics, metadata=metadata)
 
 	except AssertionError as e:
-		print()
-		e
+		print(e)
 		raise
diff --git a/tests/tests_core/test_experiment.py b/tests/tests_core/test_experiment.py
@@ -336,6 +336,74 @@ def test_delta(self):
 		np.testing.assert_equal(True, all(item in result.metadata.items()
 		                                for item in self.testmetadata.items()))
 
+	def test_delta_derived_kpis(self):
+		"""
+	    Check if Experiment.delta() functions properly for derived KPIs
+	    """
+		# this should work
+		self.assertTrue(isinstance(self.data, Experiment))  # check that the subclassing works
+
+		self.assertTrue(self.data.baseline_variant == 'B')
+
+		result = self.data.delta(kpi_subset=['derived'], 
+			derived_kpis=[{'name':'derived','formula':'normal_same/normal_shifted'}])
+
+		# check uplift
+		df = result.statistic('delta', 'uplift', 'derived')
+		np.testing.assert_almost_equal(df.loc[:, ('value', 'A')],
+									   np.array([0.308368]), decimal=5)
+		# check pctile
+		df = result.statistic('delta', 'uplift_pctile', 'derived')
+		np.testing.assert_almost_equal(df.loc[:, ('value', 'A')],
+									   np.array([-4.319602, 4.936339]), decimal=5)
+		# check samplesize
+		df = result.statistic('delta', 'sample_size', 'derived')
+		np.testing.assert_almost_equal(df.loc[:, 'value'],
+									   np.array([[6108, 3892]]), decimal=5)
+		# check variant_mean
+		df = result.statistic('delta', 'variant_mean', 'derived')
+		np.testing.assert_almost_equal(df.loc[:, 'value'],
+									   np.array([[0.376876, 0.068508]]), decimal=5)
+
+		# check metadata is preserved
+		np.testing.assert_equal(True, all(item in result.metadata.items()
+		                                for item in self.testmetadata.items()))
+
+	def test_delta_derived_kpis_weighted(self):
+		"""
+	    Check if Experiment.delta() functions properly for derived KPIs using 
+	    the weighted method.
+	    """
+		# this should work
+		self.assertTrue(isinstance(self.data, Experiment))  # check that the subclassing works
+
+		self.assertTrue(self.data.baseline_variant == 'B')
+
+		result = self.data.delta(kpi_subset=['derived'], 
+			derived_kpis=[{'name':'derived','formula':'normal_same/normal_shifted'}],
+			weighted_kpis=['derived'])
+
+		# check uplift
+		df = result.statistic('delta', 'uplift', 'derived')
+		np.testing.assert_almost_equal(df.loc[:, ('value', 'A')],
+									   np.array([-4.564575]), decimal=5)
+		# check pctile
+		df = result.statistic('delta', 'uplift_pctile', 'derived')
+		np.testing.assert_almost_equal(df.loc[:, ('value', 'A')],
+									   np.array([-10.274232, 1.145082]), decimal=5)
+		# check samplesize
+		df = result.statistic('delta', 'sample_size', 'derived')
+		np.testing.assert_almost_equal(df.loc[:, 'value'],
+									   np.array([[6108, 3892]]), decimal=5)
+		# check variant_mean
+		df = result.statistic('delta', 'variant_mean', 'derived')
+		np.testing.assert_almost_equal(df.loc[:, 'value'],
+									   np.array([[-4.572524, -0.007949]]), decimal=5)
+
+		# check metadata is preserved
+		np.testing.assert_equal(True, all(item in result.metadata.items()
+		                                for item in self.testmetadata.items()))
+
 	def test_unequal_variance_warning_in_results(self):
 		"""
 		Check if the unequal variance warning message is persisted to the Results structure

diff --git a/tests/tests_data/test_csv_fetcher.py b/tests/tests_data/test_csv_fetcher.py
@@ -2,14 +2,13 @@
 import unittest
 from os.path import dirname, join, realpath
 
-import simplejson as json
-
 import expan.data.csv_fetcher as csv_fetcher
+import simplejson as json
 import tests.tests_core.test_data as td
 
 __location__ = realpath(join(os.getcwd(), dirname(__file__)))
 
-TEST_FOLDER = __location__ + 'test_folder'
+TEST_FOLDER = __location__ + '/test_folder'
 
 
 class CsvFetcherTestCase(unittest.TestCase):

diff --git a/tox.ini b/tox.ini
@@ -5,7 +5,7 @@ envlist = py27,py34
 setenv =
 	PYTHONPATH = {toxinidir}:{toxinidir}/expan
 commands =
-;	python setup.py test
-	py.test tests
+	py.test --cov=expan tests
 deps =
+	pytest-cov
 	-r{toxinidir}/requirements.txt