# Libraries

In [1]:
import os
import sys
from itertools import product
import time
import numpy as np
import pandas as pd
import json

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from src.portfolio_selection_ga import PortfolioSelectionGA

# Import returns data

In [2]:
def read_returns_data(file_path):
    data = pd.read_excel(file_path)
    data['Date'] = pd.to_datetime(data['Date'])
    return data.set_index('Date').sort_index().pct_change().dropna()

data = read_returns_data("../data/base_dados.xlsx")

# Create the combination grid

In [3]:
popl_size_list = [10,20,30,40,50]
muta_rate_list = [0.1, 0.2, 0.3, 0.4, 0.5]
max_iter_list = [4000, 5000, 6000]
max_iter_wo_imprv_list = [50, 100, 150]

grid = []
for popl_size, muta_rate, max_iter, max_iter_wo_imprv in product(popl_size_list, muta_rate_list, max_iter_list, max_iter_wo_imprv_list):
    grid.append({
        "popl_size": popl_size,
        "muta_rate": muta_rate,
        "max_iter": max_iter,
        "max_iter_wo_imprv": max_iter_wo_imprv
    })

# Find the execution time and fitness for all parameters combination

In [4]:
risk_aver_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

for i, parameters in enumerate(grid, 1):
    print(
        f"Running simulation {i} out of {len(grid)} with parameters {parameters}",
        end="\r")
    start_param = time.time()

    execution_time_list = []
    fitness_list = []
    for risk_aver in risk_aver_list:
        execution_start = time.time()
        ga = PortfolioSelectionGA(data=data, risk_aver=risk_aver, **parameters)
        ga.optimize()
        execution_time_list.append(time.time() - execution_start)
        fitness_list.append(ga.popl_fit.min())

    parameters["total_execution_time"] = time.time() - start_param
    parameters["execution_time_list"] = execution_time_list
    parameters["fitness_list"] = fitness_list

    parameters["execution_time"] = np.mean(execution_time_list)
    parameters["execution_time_std"] = np.std(execution_time_list)
    parameters["fitness"] = np.mean(fitness_list)
    parameters["fitness_std"] = np.std(fitness_list)

Running simulation 225 out of 225 with parameters {'popl_size': 50, 'muta_rate': 0.5, 'max_iter': 6000, 'max_iter_wo_imprv': 150}

# Rank by execution time and fitness and find the best parameter combination

In [5]:
grid.sort(key=lambda x: x.get("execution_time"))
for idx, param in enumerate(grid, 1):
    param["rank_execution_time"] = idx

grid.sort(key=lambda x: x.get("fitness"))
for idx, param in enumerate(grid, 1):
    param["rank_fitness"] = idx

for param in grid:
    param["rank"] = param["rank_execution_time"] + param["rank_fitness"]
    
grid.sort(key=lambda x: x.get("rank"))
grid[0]

{'popl_size': 10,
 'muta_rate': 0.5,
 'max_iter': 5000,
 'max_iter_wo_imprv': 150,
 'total_execution_time': 104.90223288536072,
 'execution_time_list': [14.584100723266602,
  21.562983751296997,
  14.316090822219849,
  13.10235071182251,
  14.7273690700531,
  10.899469137191772,
  5.9856061935424805,
  5.333545923233032,
  4.390653848648071],
 'fitness_list': [-0.01371895677082991,
  -0.011800437455595086,
  -0.00907819137511223,
  -0.007001914122151196,
  -0.005440688517456221,
  -0.0029230279020531504,
  -0.0015989507083839511,
  -0.0004418617246591817,
  0.0002536502655703393],
 'execution_time': 11.655796686808268,
 'execution_time_std': 5.278970934746049,
 'fitness': -0.005750042034518955,
 'fitness_std': 0.004741015077910651,
 'rank_execution_time': 57,
 'rank_fitness': 79,
 'rank': 136}

# Save results

In [7]:
with open("../data/parameters_grid_results.json", "w") as file:
    json.dump(grid, file, indent=4)