# Install requirements

In [1]:
%%capture
!pip install sdmetrics
!pip install sdv



# Model Evaluation

In [2]:
from typing import Dict
from sdmetrics.reports.single_table import QualityReport
from pandas import read_csv
import time
import os
from pathlib import Path
import json

from common.config import *
from common.utils import update_json_file, ndd, ndd_to_dict, load_dataframe, get_model_class

def compute_model_eval(model_name: str, dataset_name: str) -> Dict:
    evals = ndd()

    report_path = Path(ANALYSIS_REPORTS_PATH) / f'sdv_report_{model_name}_{dataset_name}.pkl'
    if not os.path.isfile(report_path):
        metadata_path = Path(DATASET_PATH) / f'{dataset_name}_all.metadata.sdv.json'
        f = open(metadata_path)
        sdv_metadata = json.load(f)
        
        from sdv.metadata.table import Table

        sdv_metadata = Table.from_dict(sdv_metadata)

        dataset_df = load_dataframe(dataset_name)
        sample_df = read_csv(Path(SAMPLES_PATH) / f'{model_name}_{dataset_name}_sample.csv')

        report = QualityReport()
        report.generate(dataset_df, sample_df, sdv_metadata.to_dict())
        report.save(report_path)
    else:
        print('  Load the report as it already exists...')
        report = QualityReport.load(report_path)

    evals['sdv']['score'] = report.get_score()
    evals['sdv']['properties'] = {e["Property"]:e["Score"] for e in report.get_properties().to_dict(orient='records')}

    return ndd_to_dict(evals)

def evaluate_holdout(dataset_name):
    evals = ndd()

    report_path = Path(ANALYSIS_REPORTS_PATH) / f'sdv_report_holdout_{dataset_name}.pkl'
    if not os.path.isfile(report_path):
        dataset_df = load_dataframe(dataset_name)
        holdout_df = read_csv(Path(DATASET_PATH) / f'{dataset_name}_holdout.csv')
        metadata_path = Path(DATASET_PATH) / f'{dataset_name}_all.metadata.sdv.json'
        f = open(metadata_path)
        sdv_metadata = json.load(f)
        
        from sdv.metadata.table import Table

        sdv_metadata = Table.from_dict(sdv_metadata)

        evals = ndd()
        report = QualityReport()
        report.generate(dataset_df, holdout_df, sdv_metadata.to_dict())
        report.save(report_path)
    else:
        print('  Load the report as it already exists...')
        report = QualityReport.load(report_path)

    evals['sdv']['score'] = report.get_score()
    evals['sdv']['properties'] = {e["Property"]:e["Score"] for e in report.get_properties().to_dict(orient='records')}
    return evals

def compute_models_eval():
    datasets_config = get_datsets_config()
    models_config = get_models_config()

    evals = ndd()
    for dataset_name in datasets_config.keys():
        print(f'# Holdout on {dataset_name}')
        evals[dataset_name]['holdout'] = evaluate_holdout(dataset_name)
        for model_name in models_config.keys():
            print(f'# Model {model_name} on {dataset_name}')
            try:
                evals[dataset_name][model_name] = compute_model_eval(model_name, dataset_name)
            except Exception as e:
                print('Could not generate the report!')
                print(e)
    return ndd_to_dict(evals)


In [3]:
models_eval = compute_models_eval()

# Holdout on sdv.adult
  Load the report as it already exists...
# Model fabric.regular on sdv.adult
  Load the report as it already exists...
# Model sdv.tabular on sdv.adult
  Load the report as it already exists...


In [4]:
from common.config import get_analysis_config
from common.utils import update_json_file

analysis_config = get_analysis_config()

path = analysis_config['output_files']['eval']
update_json_file(path, models_eval)