# Experimental Evaluation with BugsInPy

To run these experiments you need to clone BugsInPy from [here](https://github.com/smythi93/BugsInPy). Install it as explained in the [README.md](https://github.com/smythi93/BugsInPy/blob/master/README.md).

## General Imports

In [3]:
!pip install whatthepatch

In [5]:
import json
import hashlib
import os
import re
import shutil
import subprocess
import time
import whatthepatch
from typing import List, Set
from xml.etree import ElementTree as etree
from sflkit.analysis.suggestion import Location

## Configuration of Experiments

In [6]:
subject_selection = False
continue_subject_selection = True

In [7]:
events_extraction = True
continue_events_extraction = True

In [8]:
tmp = '/tmp'
subjects_file = 'subjects.json'
default_project = 'project'
events_path = 'EVENTS_PATH'
config_tmp = 'tmp.ini'

In [9]:
def get_dir(p):
    return os.path.join(tmp, p)

def clean_project(project):
    shutil.rmtree(get_dir(project), ignore_errors=True)

In [10]:
sflkit_dir = os.path.abspath('.')
sflkit_dir

In [11]:
events_output = os.path.abspath('events')
events_output

In [12]:
PASSED = 'PASSED'
FAILED = 'FAILED'

## BugsInPy Benchmark

In [13]:
bip_info = 'bugsinpy-info'
bip_checkout = 'bugsinpy-checkout'
bip_compile = 'bugsinpy-compile'
bip_test = 'bugsinpy-test'
bip_sdtools = 'bugsinpy-sflkit'
info_file = 'bugsinpy_bug.info'
run_test_file = 'bugsinpy_run_test.sh'
patch_files = [
    'bugsinpy_bug_patch.txt',
    'bugsinpy_patchfile.info'
]

In [14]:
projects = [
    'PySnooper',
    'ansible',
    'black',
    'cookiecutter',
    'fastapi',
    'httpie',
    'keras',
    'luigi',
    'matplotlib',
    'pandas',
    'sanic',
    'scrapy',
    'spacy',
    'thefuck',
    'tornado',
    'tqdm',
    'youtube-dl'
]

In [15]:
for project in projects:
    globals()[project.replace('-', '_')] = project
    clean_project(project)

In [16]:
excluded_subjects = [matplotlib, pandas, spacy]
max_bugs_per_subject = 500

In [17]:
test_project = PySnooper
test_bug_id = 3

In [18]:
number_of_bugs_pattern = re.compile(r'Number\s*of\s*bugs\s*:\s*(?P<bugs>\d+)')

def get_bugs(project):
    process = subprocess.run([bip_info, '-p', project], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
    match = number_of_bugs_pattern.search(process.stdout.decode('utf8'))
    if match:
        return int(match.group('bugs'))

In [19]:
assert get_bugs(test_project) == 3

In [20]:
test_file_pattern = re.compile(r'Triggering\s*test\s*file\s*(?P<test_file>[^\n]+)')

def get_test_file(project: str, bug_id: int):
    process = subprocess.run([bip_info, '-p', project, '-i', str(bug_id)], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
    match = test_file_pattern.search(process.stdout.decode('utf8'))
    if match:
        return match.group('test_file')

In [21]:
test_test_file = get_test_file(test_project, test_bug_id)
assert test_test_file == 'tests/test_pysnooper.py'

In [22]:
def get_project(project: str, buggy: bool = True, bug_id: int = 0, delete: bool = False, verbose: bool = False):
    if delete:
        clean_project(project)
    process = subprocess.run([bip_checkout, '-p', project, '-v', '0' if buggy else '1', '-i', str(bug_id), '-w', tmp], 
                             stdout=None if verbose else subprocess.DEVNULL,
                             stderr=subprocess.STDOUT if verbose else subprocess.DEVNULL)
    assert process.returncode == 0

In [23]:
def is_unittest(project):
    with open(os.path.join(get_dir(project), run_test_file), 'r') as fp:
        s = fp.read()
    return 'unittest' in s

In [24]:
version_pattern = re.compile(r'python_version="(?P<version>[^\"]+)"')

original_env = os.environ.copy()
    
def env_on(project: str, verbose=False):
    if os.path.exists(os.path.join(get_dir(project), info_file)):
        with open(os.path.join(get_dir(project), info_file), 'r') as fp:
            s = fp.read()
        match = version_pattern.search(s)
        if match:
            version = match.group('version')
            os.environ = original_env.copy()
            process = subprocess.run(['pyenv', 'install', version], 
                                     stdout=None if verbose else subprocess.DEVNULL,
                                     stderr=subprocess.STDOUT if verbose else subprocess.DEVNULL)
            os.environ['PATH'] = f'{os.path.join(os.environ["PYENV_ROOT"], "versions", version, "bin")}:{os.environ["PATH"]}'
            process = subprocess.run(['pip', 'install', '--upgrade', 'pip'], 
                                     stdout=None if verbose else subprocess.DEVNULL,
                                     stderr=subprocess.STDOUT if verbose else subprocess.DEVNULL)

In [25]:
def setup_project(project: str, verbose: bool = False):
    env_on(project, verbose=verbose)
    process = subprocess.run([bip_compile, '-w', get_dir(project)],  
                             stdout=None if verbose else subprocess.DEVNULL,
                             stderr=subprocess.STDOUT if verbose else subprocess.DEVNULL, env=os.environ)
    assert process.returncode == 0

In [26]:
def install_project(project: str, bug_id: int, buggy: bool = True, delete: bool = True, verbose: bool = False):
    get_project(project, buggy=buggy, bug_id=bug_id, verbose=verbose)
    setup_project(project, verbose=verbose)

In [27]:
NEWLINE_TOKEN = 'SDNEWLINE'

def replace_important(s: str):
    important = False
    result = ''
    escaped = False
    while s:
        if not important:
            if s.startswith('name="'):
                result += 'name="'
                s = s[6:]
                important = True
            elif s.startswith('classname="'):
                result += 'classname="'
                s = s[11:]
                important = True
            else:
                result += s[0]
                s = s[1:]
        else:
            if s[0] == '\n':
                result += NEWLINE_TOKEN
                s = s[1:]
            elif s[0] == '"' and not escaped:
                result += '"'
                s = s[1:]
                important = False
            elif s[0] == '\\' and not escaped:
                result += '\\'
                s = s[1:]
                escaped = True
            else:
                result += s[0]
                s = s[1:]
                escaped = False
    return result

def get_test_results(project: str, tmp_file: str):
    tests = list()
    is_unittest_ = is_unittest(project) 
    try:
        with open(tmp_file, 'r') as fp:
            s = fp.read()
        tree = etree.fromstring(replace_important(s))
    except FileNotFoundError:
        print('pytest did not generate file')
        return tests
    except etree.ParseError:
        print('pytest produced empty file')
        return tests
    directory = get_dir(project)
    for testcase in tree.findall('.//testcase'):
        if is_unittest_:
            test = testcase.get('classname').replace(NEWLINE_TOKEN, '\n') + '.' + testcase.get('name').replace(NEWLINE_TOKEN, '\n')
        else:
            path = testcase.get('classname').replace(NEWLINE_TOKEN, '\n').split(".")
            file = ''
            classes = '::'
            for i in range(1, len(path) + 1):
                file = os.path.join(*path[:i]) + '.py'
                if os.path.exists(os.path.join(directory, file)):
                    if len(path[i:]) > 0:
                        classes = '::' + '::'.join(path[i:]) + '::'
                    break
            test = file + classes + testcase.get('name').replace(NEWLINE_TOKEN, '\n')
        if testcase.find('failure') is not None or testcase.find('error') is not None:
            tests.append((test, FAILED))
        elif len(list(testcase)) == 0 or (
            (len(list(testcase)) == 1 and (testcase.find('system-out') is not None or testcase.find('system-err') is not None)) or
            (len(list(testcase)) == 2 and (testcase.find('system-out') is not None and testcase.find('system-err') is not None))):
            tests.append((test, PASSED))
    return tests

In [28]:
def get_tests(project: str, test_file: str, verbose: bool = False):
    tmp_file = os.path.join(get_dir(project), 'tmp.xml')
    try:
        os.remove(tmp_file)
    except OSError:
        pass

    process = subprocess.Popen([bip_test, '-t', test_file, '-o', tmp_file, '-w', get_dir(project)],  
                               stdout=None if verbose else subprocess.DEVNULL,
                               stderr=subprocess.STDOUT if verbose else subprocess.DEVNULL, env=os.environ)
    while process.poll() is None:
        if os.path.exists(tmp_file):
            try:
                process.wait(5)
            except subprocess.TimeoutExpired:
                process.terminate()
        try:
            process.wait(1)
        except subprocess.TimeoutExpired:
            pass
    passing, failing = list(), list()
    for t, s in get_test_results(project, tmp_file):
        if s == PASSED:
            passing.append(t)
        elif s == FAILED:
            failing.append(t)
    return passing, failing

In [29]:
def verify_correct_version(project: str, bug_id: int, test_file: str, verbose: bool = False):
    install_project(project, bug_id, buggy=False, verbose=verbose)
    passing, failing = get_tests(project, test_file, verbose=verbose)
    return len(failing) == 0 and len(passing) > 0

In [30]:
assert verify_correct_version(test_project, test_bug_id, test_test_file)

In [27]:
install_project(test_project, bug_id=test_bug_id)

In [28]:
assert not is_unittest(test_project)

In [29]:
test_passing, test_failing = get_tests(test_project, test_test_file)

In [30]:
test_passing

In [31]:
test_failing

In [32]:
def run_test(project: str, test_case: str, verbose: bool = False):
    passing, failing = get_tests(project, test_case, verbose=verbose)
    return len(passing) > 0 and len(failing) == 0

In [33]:
for t in test_passing:
    assert run_test(test_project, t)
for t in test_failing:
    assert not run_test(test_project, t)

In [34]:
def get_faulty_lines(project: str):
    locations = list()
    for patch_file in patch_files:
        try:
            with open(os.path.join(get_dir(project), patch_file), 'r') as fp:
                s = fp.read()
            for diff in whatthepatch.parse_patch(s):
                last = None
                for change in diff.changes:
                    if change.old is None:
                        if last is None:
                            last = change.new
                            
                    else:
                        last = change.old
                    location = Location(diff.header.old_path, last)
                    if location not in locations:
                        locations.append(location)
        except (IOError, whatthepatch.exceptions.WhatThePatchException):
            pass
    return locations

In [35]:
test_faulty_lines = get_faulty_lines(test_project)
test_faulty_lines

In [36]:
class Subject:
    def __init__(self, project: str, bug_id: int, passing: List[str], failing: List[str], is_unittest_: bool, faulty_lines: List[Location]):
        self.project = project
        self.bug_id = bug_id
        self.passing = passing
        self.failing = failing
        self.is_unittest = is_unittest_
        self.faulty_lines = faulty_lines
        
    def __repr__(self):
        return f'Subject(project={self.project},bugId={self.bug_id},#passing={len(self.passing)},#failing={len(self.failing)})'
    
    def __str__(self):
        return repr(self)
    
    def __hash__(self):
        return hash((self.project, self.bug_id))
    
    def __eq__(self, other):
        return isinstance(other, Subject) and other.project == self.project and other.bug_id == self.bug_id
        
    def serialize(self):
        return {
            'project': self.project,
            'bugId': self.bug_id,
            'passing': self.passing,
            'failing': self.failing,
            'is_unittest': self.is_unittest,
            'faulty_lines':  [(location.file, location.line) for location in self.faulty_lines],
        }
    
    @staticmethod
    def deserialize(values: dict):
        return Subject(values['project'], values['bugId'], values['passing'], values['failing'], values['is_unittest'], [Location(file, line) for file, line in values['faulty_lines']])

In [37]:
def verify_project(project: str, bug_id: int, fast: bool = False, verbose=False) -> Subject:
    install_project(project, bug_id=bug_id, verbose=verbose)
    test_file = get_test_file(project, bug_id)
    passing, failing = get_tests(project, test_file, verbose=verbose)
    if len(failing) == 0:
        print(f'X {project:<40}{bug_id:<3} - No failing tests on buggy version')
    elif len(passing) == 0:
        print(f'X {project:<40}{bug_id:<3} - No passing tests on buggy version')
    else:
        if fast or verify_correct_version(project, bug_id, test_file, verbose=verbose):
            print(f'  {project:<40}{bug_id:<3}')    
        else:
            print(f'  {project:<40}{bug_id:<3} - Failing tests on correct version')
        return Subject(project, bug_id, passing, failing, is_unittest(project), get_faulty_lines(project))

In [38]:
test_subject = verify_project(test_project, test_bug_id)
test_subject

In [39]:
test_subject.faulty_lines

In [40]:
def get_all_valid(found_subjects: list = None, fast: bool = False, excludes: list = None, max_number: int = None):
    
    if excludes is None:
        excludes = list()
    if found_subjects is None:
        found_subjects = list()
    subjects = found_subjects
    try:
        for p in projects:
            if p not in excludes:
                bug_count = len(list(filter(lambda s: s.project == p, subjects)))
                for b in range(get_bugs(p)):
                    if max_number and max_number > bug_count:
                        if not any(s.project == p and s.bug_id == b + 1 for s in found_subjects):
                            s = verify_project(p, b + 1, fast=fast)
                            if s:
                                subjects.append(s)
                                bug_count += 1
        return subjects
    finally:
        globals()['subjects'] = subjects

In [41]:
if subject_selection:
    if continue_subject_selection:
        subjects = list()
        if os.path.exists(subjects_file):
            with open(subjects_file, 'r') as fp:
                json_subjects = json.load(fp)
            for values in json_subjects:
                subjects.append(Subject.deserialize(values))
        subjects = get_all_valid(subjects, fast=True, excludes=excluded_subjects, 
                                 max_number=max_bugs_per_subject)
    else:
        subjects = get_all_valid(fast=True, excludes=excludes)
else:
    subjects = list()
    if os.path.exists(subjects_file):
        with open(subjects_file, 'r') as fp:
            json_subjects = json.load(fp)
        for values in json_subjects:
            subjects.append(Subject.deserialize(values))

In [42]:
if subject_selection:
    with open(subjects_file, 'w') as fp:
        json.dump(list(map(Subject.serialize, subjects)), fp, indent = 4)

In [43]:
len(subjects)

In [44]:
for subject in subjects:
    globals()[f'subject_{subject.project.replace("-", "_")}_{subject.bug_id}'] = subject

In [45]:
stats = {p: {'bugs': get_bugs(p), 'used': 0, 'tests': 0} for p in projects}

for subject in subjects:
    stats[subject.project]['used'] += 1
    stats[subject.project]['tests'] += len(subject.passing) + len(subject.failing)

print(f'{"project":<20}{"bugs":<6}{"used":<6}{"tests":<6}')

for p in stats:
    print(f'{p:<20}{stats[p]["bugs"]:<6}{stats[p]["used"]:<6}{stats[p]["tests"]:<6}')

## SFLKit Framework

In [46]:
from sflkit import instrument, analyze
from sflkit.config import Config
from sflkit.analysis.analysis_type import AnalysisType
from sflkit.analysis.suggestion import Suggestion

In [47]:
from configparser import ConfigParser

In [48]:
def generate_config(project, dst, excluded_project_files, passing, failing):
    config = ConfigParser()
    config['target'] = dict()
    config['events'] = dict()
    config['instrumentation'] = dict()
    config['test'] = dict()

    config['target']['path'] = get_dir(project)
    config['target']['language'] = 'python'
    config['events']['events'] = 'line'
    config['events']['predicates'] = 'line'
    config['events']['metrics'] = 'Ochiai,Tarantula,Jaccard'
    config['events']['passing'] = passing
    config['events']['failing'] = failing
    config['instrumentation']['path'] = get_dir(dst)
    config['instrumentation']['exclude'] = '"' + '","'.join(excluded_project_files) + '"'
    with open(config_tmp, 'w') as fp:
        config.write(fp)

### Extract Execution Events

In [49]:
excluded_project_files = ['test', 'tests', 'setup.py', 'env', 'build', 'bin', 'docs', 'examples', 
                          'hacking', '.git', '.github', 'extras', 'profiling', 'plugin', 'gallery', 
                          'blib2to3', 'docker', 'contrib', 'changelogs', 'licenses', 'packaging']

In [50]:
excluded_tests = [
    'tests/test_black.py::BlackTestCase::test_self',
    'tests/test_black.py::BlackTestCase::test_pytree',
    'tests/test_black.py::BlackTestCase::test_root_logger_not_used_directly',
    'tests/test_black.py::BlackTestCase::test_tokenize',
    'tests/test_black.py::BlackTestCase::test_pgen',
    'tests/test_black.py::BlackTestCase::test_expression_diff_with_color',
    'tests/test_black.py::BlackTestCase::test_expression_diff',
    'tests/test_black.py::BlackTestCase::test_expression_ff',
    'tests/test_black.py::BlackTestCase::test_expression',
]

In [51]:
def get_path(project: str, bug_id: int, passing: bool = True):
    return os.path.join(events_output, project, str(bug_id), 'passing' if passing else 'failing')

In [52]:
def instrument_and_install_project(project: str, bug_id: int, dst: str, verbose=False):
    shutil.rmtree(dst, ignore_errors=True)
    get_project(project, bug_id=bug_id, verbose=verbose)
    config = generate_config(project, dst, excluded_project_files, get_path(project, bug_id, True), get_path(project, bug_id, False))
    instrument(config_tmp)
    env_on(project, verbose=verbose)
    process = subprocess.run([bip_sdtools, '-w', get_dir(dst), '-s', sflkit_dir],
                      stdout=None if verbose else subprocess.DEVNULL,
                      stderr=subprocess.STDOUT if verbose else subprocess.DEVNULL, env=os.environ)
    assert process.returncode == 0

In [53]:
instrument_and_install_project(test_project, test_bug_id, default_project)

In [54]:
def get_events_path_file(project: str):
    return os.path.join(get_dir(project), events_path)

In [55]:
class TestDiscard(Exception):
    pass

In [56]:
def get_file(test_case: str, passing: bool = True):
    return f'{"p" if passing else "f"}_{hashlib.md5(test_case.encode("utf8")).hexdigest()}'

In [57]:
def get_events_path(project: str, bug_id: int, dst: str, test_case: str, passing: bool = True, verbose=False):
    if not os.path.exists(events_output):
        os.mkdir(events_output)
    if not os.path.exists(os.path.join(events_output, project)):
        os.mkdir(os.path.join(events_output, project))
    if not os.path.exists(os.path.join(events_output, project, str(bug_id))):
        os.mkdir(os.path.join(events_output, project, str(bug_id)))
    if not os.path.exists(os.path.join(events_output, project, str(bug_id), 'passing')):
        os.mkdir(os.path.join(events_output, project, str(bug_id), 'passing'))
    if not os.path.exists(os.path.join(events_output, project, str(bug_id), 'failing')):
        os.mkdir(os.path.join(events_output, project, str(bug_id), 'failing'))
        
    if os.path.exists(get_events_path_file(dst)):
        os.remove(get_events_path_file(dst))
        
    test_result = run_test(dst, test_case, verbose=verbose)
    
    if test_result != passing:
        raise TestDiscard(f'The result of {test_case} was not correct.')
    if os.path.exists(get_events_path_file(dst)):
        shutil.copy(get_events_path_file(dst), os.path.join(get_path(project, bug_id, passing), 
                                                            get_file(test_case, passing)))
    else:
        raise TestDiscard(f'Events path not found for {test_case}.')

In [58]:
get_events_path(test_project, test_bug_id, default_project, test_passing[0], verbose=True)

In [59]:
def get_all_events_paths(project: str, bug_id: int, dst: str, passing: List[str], failing: bool = List[str], verbose=False):
    p = 0
    f = 0
    d = 0
    
    for t in passing:
        if t not in excluded_tests:
            try:
                get_events_path(project, bug_id, dst, t, verbose=verbose)
                p += 1
            except TestDiscard as e:
                if verbose:
                    print(e)
                d += 1
        else:
            d += 1
    
    for t in failing:
        try:
            get_events_path(project, bug_id, dst, t, passing=False, verbose=verbose)
            f += 1
        except TestDiscard as e:
            if verbose:
                print(e)
            d += 1
                    
    
    print(f'{project:<40}{bug_id:<6}{p:<6}{f:<6}{d:<6}')
    return p > 0 and f > 0

In [60]:
assert get_all_events_paths(test_project, test_bug_id, default_project, test_passing, test_failing)

In [61]:
def extract_events_for_project(project: str, bug_id: int, dst: str, passing: List[str], failing: bool = List[str], verbose=False):
    instrument_and_install_project(project, bug_id, dst, verbose=verbose)
    return get_all_events_paths(project, bug_id, dst, passing, failing, verbose=verbose)

In [62]:
def extract_events(subject: Subject, dst: str, verbose=False):
    return extract_events_for_project(subject.project, subject.bug_id, dst, subject.passing, subject.failing, verbose=verbose)

In [63]:
extract_events(test_subject, default_project)

In [64]:
def get_all_events(subjects: List[Subject], continue_: bool = False):
    for subject in subjects:
        if not (continue_ and os.path.exists(os.path.join(events_output, subject.project, str(subject.bug_id)))):
            extract_events(subject, default_project)
        else:
            p = len(os.listdir(get_path(subject.project, subject.bug_id, passing=True)))
            f = len(os.listdir(get_path(subject.project, subject.bug_id, passing=False)))
            print(f'{subject.project:<40}{subject.bug_id:<6}{p:<6}{f:<6}{len(subject.passing) - p + len(subject.failing) - f:<6}')

In [65]:
if events_extraction:
    get_all_events(subjects, continue_=continue_events_extraction)

### Extract Features

In [66]:
def get_results(project: str, bug_id: int, dst: str, verbose=False):
    config = generate_config(project, dst, excluded_project_files, get_path(project, bug_id, True), get_path(project, bug_id, False))
    return analyze(config_tmp)

In [67]:
test_results = get_results(test_project, test_bug_id, default_project)
test_results

In [68]:
def get_suggestions(type_: AnalysisType, metric: str, results:dict) -> List[Suggestion]:
    return results[type_.name][metric]

In [69]:
def get_k_suggestions(suggestions: List[Suggestion], k: int) -> Set[Suggestion]:
    result = set()
    highest = suggestions[0].suspiciousness
    for suggestion in suggestions:
        if len(result) >= k and suggestion.suspiciousness < highest:
            break
        result.update(suggestion.lines)
    return result

In [70]:
def get_k(type_: AnalysisType, metric: str, results: dict, k: int = 10) -> Set[Suggestion]:
    return get_k_suggestions(get_suggestions(type_, metric, results), k)

In [71]:
get_k(AnalysisType.LINE, 'Ochiai', test_results)

In [72]:
def precision_at_k(type_: AnalysisType, metric: str, results:dict, locations: List[Location], k: int = 10):
    suggestions = get_k(type_, metric, results, k=k)
    locations = set(locations)
    return len(locations.intersection(suggestions)) / len(suggestions)

In [73]:
for k in [1, 3, 5, 10]:
    print(f'k={k:<2} : {precision_at_k(AnalysisType.LINE, "Ochiai", test_results, test_faulty_lines, k=k)}')

In [74]:
def recall_at_k(type_: AnalysisType, metric: str, results:dict, locations: List[Location], k: int = 10):
    suggestions = get_k(type_, metric, results, k=k)
    locations = set(locations)
    return len(locations.intersection(suggestions)) / len(locations)

In [75]:
for k in [1, 3, 5, 10]:
    print(f'k={k:<2} : {recall_at_k(AnalysisType.LINE, "Ochiai", test_results, test_faulty_lines, k=k)}')

## Run Evaluation

In [76]:
def run_for_subject(subject: Subject, ks=[1, 3, 5, 10], extract=True, continue_extraction=True, verbose=False):
    results = get_results(subject.project, subject.bug_id, default_project, verbose=verbose)
    evaluation = dict()
    for type_ in results:
        evaluation[type_] = dict()
        for metric in results[type_]:
            evaluation[type_][metric] = dict()
            for k in ks:
                evaluation[type_][metric][f'p@{k}'] = precision_at_k(AnalysisType[type_], metric, results, subject.faulty_lines, k=k)
                evaluation[type_][metric][f'r@{k}'] = recall_at_k(AnalysisType[type_], metric, results, subject.faulty_lines, k=k)
    print(f'Evaluated {subject}')
    return evaluation

In [77]:
run_for_subject(test_subject)

In [78]:
def run_for_subjects(subjects: List[Subject], ks=[1, 3, 5, 10], extract=True, continue_extraction=True, verbose=False):
    results = list()
    f = len(subjects)
    for subject in subjects:
        results.append(run_for_subject(subject, ks=ks, extract=extract, continue_extraction=continue_extraction, verbose=verbose))
    return results

In [79]:
results = run_for_subjects(subjects, extract=extract_events, continue_extraction=continue_events_extraction)

In [81]:
def average(results):
    evaluation = dict()
    f = len(results)
    for r in results:
        for t in r:
            if t not in evaluation:
                evaluation[t] = dict()
            for m in r[t]:
                if m not in evaluation[t]:
                    evaluation[t][m] = dict()
                for pr in r[t][m]:
                    if pr not in evaluation[t][m]:
                        evaluation[t][m][pr] = 0
                    evaluation[t][m][pr] += r[t][m][pr] / f
    return evaluation

In [None]:
evaluation = average(results)
evaluation

In [None]:
with open('results.json', 'w') as fp:
    json.dump(evaluation, fp, indent = 4)