In [24]:
import pandas as pd
import json

In [25]:
def retrieve_data(data_file_path: str) -> dict:
    with open(data_file_path) as data_file:
        data = json.load(data_file)
        return data

In [26]:
def get_subject(data: dict) -> str:
    return data['info']['metadata']['subject']

In [27]:
def get_last_generation(data: dict) -> dict:
    if 'info' in data.keys():
        del data['info']
    best_solution = max(map(int, data.keys()))
    return data[str(best_solution)]

In [28]:
def make_row(subject: str, generation_record: dict) -> dict:
    row = dict()
    row['subject'] = subject
    solution = generation_record['solution']
    for gene, value in solution.items():
        del value['feature_parameters']
        value_keys = tuple(value.keys())
        for key in value_keys:
            value[f'{key}_{gene}'] = value[key]
            del value[key]
    flat_solution = dict()
    for key in solution:
        flat_solution = flat_solution | solution[key]
    del generation_record['solution']
    row = row | flat_solution | generation_record
    return row

In [35]:
def encode(row: dict) -> dict:
    for key, value in row.items():
        if not isinstance(value, str):
            row.update({key: repr(value)})

In [29]:
data_file_path = '../data/processed/S2/results-S2-08-01-2023-21-38-51.json'

In [30]:
data = retrieve_data(data_file_path)

In [31]:
subject = get_subject(data)

In [32]:
generation_record = get_last_generation(data)

In [33]:
row = make_row(subject, generation_record)

In [36]:
encoded_row = encode(row)

In [37]:
print(row)

{'subject': 'S2', 'feature_g1': 'pow_spect', 'channels_g1': "['Cz']", 'frequency_bands_g1': '[[0.5, 4], [7.5, 22]]', 'feature_g2': 'variance', 'channels_g2': "['C3', 'C4']", 'frequency_bands_g2': '[[7.5, 13]]', 'feature_g3': 'variance', 'channels_g3': "['C3', 'C4']", 'frequency_bands_g3': '[[7.5, 17.5]]', 'accuracy': '0.6411', 'avg_feature_feature_corr': '0.1827202624398782', 'avg_offspring_merit': '0.1809414523048869'}
