Skip to content

Commit

Permalink
Robust unit tests for analysis. Closes #171.
Browse files Browse the repository at this point in the history
  • Loading branch information
onyxfish committed Aug 28, 2015
1 parent 87442d3 commit eb8e463
Show file tree
Hide file tree
Showing 2 changed files with 164 additions and 11 deletions.
21 changes: 10 additions & 11 deletions agate/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

try:
import cPickle as pickle
except ImportError:
except ImportError: # pragma: no cover
import pickle

class Analysis(object):
Expand Down Expand Up @@ -36,7 +36,7 @@ def _fingerprint(self):
"""
hasher = hashlib.md5()
source = inspect.getsource(self._func)
hasher.update(source)
hasher.update(source.encode('utf-8'))

return hasher.hexdigest()

Expand Down Expand Up @@ -72,11 +72,9 @@ def _save_data(self, data):
"""
path = os.path.join(self._cache_path, '%s.data' % self._name)

if not os.path.exists(self._cache_path):
os.makedirs(self._cache_path)

with open(path, 'w') as f:
f.write(bz2.compress(pickle.dumps(data)))
f = bz2.BZ2File(path, 'w')
f.write(pickle.dumps(data))
f.close()

def _load_data(self):
"""
Expand All @@ -85,10 +83,11 @@ def _load_data(self):
path = os.path.join(self._cache_path, '%s.data' % self._name)

if not os.path.exists(path):
return None
raise IOError('Data cache missing at %s' % path)

with open(path) as f:
data = pickle.loads(bz2.decompress(f.read()))
f = bz2.BZ2File(path)
data = pickle.loads(f.read())
f.close()

return data

Expand All @@ -100,7 +99,7 @@ def then(self, next_func):
:param func: The analysis function. Must accept a `data` argument that
is the state inherited from ancestors analysis.
"""
analysis = Analysis(next_func)
analysis = Analysis(next_func, cache_path=self._cache_path)

self._next_analyses.append(analysis)

Expand Down
154 changes: 154 additions & 0 deletions tests/test_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
#!/usr/bin/env Python

from copy import deepcopy
import os
import shutil
from time import sleep

try:
import unittest2 as unittest
except ImportError:
import unittest

from agate.analysis import Analysis
from agate.column_types import NumberType, TextType
from agate.table import Table

TEST_CACHE = '.agate-test'

def wait_for_create(path):
while not os.path.exists(path):
sleep(1)

def wait_for_delete(path):
while os.path.exists(path):
sleep(1)

class TestAnalysis(unittest.TestCase):
def setUp(self):
self.executed_stage1 = 0
self.data_before_stage1 = None
self.data_after_stage1 = None

self.executed_stage2 = 0
self.data_before_stage2 = None
self.data_after_stage2 = None

def tearDown(self):
shutil.rmtree(TEST_CACHE)

def stage1(self, data):
self.executed_stage1 += 1
self.data_before_stage1 = deepcopy(data)

data['stage1'] = 5

self.data_after_stage1 = deepcopy(data)

def stage2(self, data):
self.executed_stage2 += 1
self.data_before_stage2 = deepcopy(data)

data['stage2'] = data['stage1'] * 5

self.data_after_stage2 = deepcopy(data)

def test_data_flow(self):
analysis = Analysis(self.stage1, cache_path=TEST_CACHE)
analysis.then(self.stage2)

data = {}

analysis.run(data)

self.assertEqual(data, {})
self.assertEqual(self.data_before_stage1, {})
self.assertEqual(self.data_after_stage1, { 'stage1': 5 })
self.assertEqual(self.data_before_stage2, { 'stage1' : 5 })
self.assertEqual(self.data_after_stage2, { 'stage1': 5, 'stage2': 25 })

def test_caching(self):
analysis = Analysis(self.stage1, cache_path=TEST_CACHE)
analysis.then(self.stage2)

analysis.run()

self.assertEqual(self.executed_stage1, 1)
self.assertEqual(self.executed_stage2, 1)

analysis.run()

self.assertEqual(self.executed_stage1, 1)
self.assertEqual(self.executed_stage2, 1)

def test_descendent_fingerprint_deleted(self):
analysis = Analysis(self.stage1, cache_path=TEST_CACHE)
analysis.then(self.stage2)

analysis.run()

self.assertEqual(self.executed_stage1, 1)
self.assertEqual(self.executed_stage2, 1)

path = os.path.join(TEST_CACHE, 'stage2.fingerprint')
os.remove(path)

analysis.run()

self.assertEqual(self.executed_stage1, 1)
self.assertEqual(self.executed_stage2, 2)

def test_ancestor_fingerprint_deleted(self):
analysis = Analysis(self.stage1, cache_path=TEST_CACHE)
analysis.then(self.stage2)

analysis.run()

self.assertEqual(self.executed_stage1, 1)
self.assertEqual(self.executed_stage2, 1)

path = os.path.join(TEST_CACHE, 'stage1.fingerprint')
os.remove(path)

analysis.run()

self.assertEqual(self.executed_stage1, 2)
self.assertEqual(self.executed_stage2, 2)

def test_descendent_fingerprint_mismatch(self):
analysis = Analysis(self.stage1, cache_path=TEST_CACHE)
analysis.then(self.stage2)

analysis.run()

self.assertEqual(self.executed_stage1, 1)
self.assertEqual(self.executed_stage2, 1)

path = os.path.join(TEST_CACHE, 'stage2.fingerprint')

with open(path, 'w') as f:
f.write('foo')

analysis.run()

self.assertEqual(self.executed_stage1, 1)
self.assertEqual(self.executed_stage2, 2)

def test_ancestor_fingerprint_mismatch(self):
analysis = Analysis(self.stage1, cache_path=TEST_CACHE)
analysis.then(self.stage2)

analysis.run()

self.assertEqual(self.executed_stage1, 1)
self.assertEqual(self.executed_stage2, 1)

path = os.path.join(TEST_CACHE, 'stage1.fingerprint')

with open(path, 'w') as f:
f.write('foo')

analysis.run()

self.assertEqual(self.executed_stage1, 2)
self.assertEqual(self.executed_stage2, 2)

0 comments on commit eb8e463

Please sign in to comment.