In [6]:
import csv
import json

import numpy as np
import pandas as pd
from pycaret.regression import *

In [2]:
def substitute_values(data, sorted_index, currency, column):
    sub = 1

    # This list stores the values at which the labels were changed
    divider_list = [currency, column]

    for i, index in enumerate(sorted_index):
        if i == 33:
            sub += 1
            divider_list.append(data.at[index, column])
        if i == 66:
            sub += 1
            divider_list.append(data.at[index, column])

        data.at[index, column] = sub

    with open("divider_list.csv", "a", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(divider_list)

    return data

In [3]:
def main(currency_pairs):
    selected_models = {}
    with open("divider_list.csv", "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["Currency", "Column", "33rd", "66th"])

    for currency in currency_pairs:
        data = pd.read_csv("./results/{}.csv".format(currency))

        # Preprocessing
        # Removing the first row since it is an outlier - return is 0
        data = data.drop(data.index[0])
        # Delete any rows with null values
        data.dropna(inplace=True)
        # Delete any rows with 0 Vol and 0 FD
        data = data[data.vol != 0]
        data = data[data.fd != 0]
        # Multiply the returns column by 10000 to have a usable column
        data.return_val *= 10000
        # Resetting the indexes in the dataframe
        data.reset_index(drop=True, inplace=True)

        # Sorting the values independently as it worked the best
        volatility_array = np.array(data["vol"])
        # Argsort sorts the data and returns the indexes of the sorted values from original list
        sorted_index = np.argsort(volatility_array)
        # Using the sorted index positions, we can replace the values for
        data = substitute_values(data, sorted_index, currency, "vol")

        # Do the same actions for Fractal Dimension
        fd_array = np.array(data["fd"])
        sorted_index = np.argsort(fd_array)
        data = substitute_values(data, sorted_index, currency, "fd")

        # Using PyCaret functions to setup the regression model trainer
        regression = setup(data=data, target="return_val")

        # Compare the best models after training on different types
        best = compare_models()
        selected_models[currency] = best.__str__()

        # Choose the best regressor technique to create the model
        model = create_model(best)

        # Run model tune to get a better fit
        tuned_model = tune_model(model)

        # Mark the tuned model as final and save it
        model = finalize_model(tuned_model)
        save_model(model, "./models/{}".format(currency))
    print(json.dumps(selected_models, indent=4))

In [4]:
# A set of currency pairs
currency_pairs = [
    "EURUSD",
    "GBPUSD",
    "USDCAD",
    "USDCHF",
    "USDHKD",
    "USDAUD",
    "USDNZD",
    "USDSGD",
]

main(currency_pairs)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1.6093,6.1524,2.4804,-0.1921,0.7235,0.6088
1,1.8189,7.3805,2.7167,-0.4298,0.6811,1.3086
2,1.1319,3.8664,1.9663,-0.1101,0.5907,0.8223
3,6.3405,160.0896,12.6527,-0.1818,1.5298,1.2004
4,1.774,4.3468,2.0849,-0.4641,0.6558,1.6666
5,4.0143,22.4855,4.7419,-2.7686,0.9566,2.8978
6,3.1211,10.3625,3.2191,-0.2089,1.2508,0.9772
7,3.7097,43.6518,6.607,-0.5015,0.8642,5.1844
8,1.7991,5.2831,2.2985,-0.1714,0.8779,0.7851
9,6.4417,138.601,11.7729,-0.1759,1.6595,0.9862


Transformation Pipeline and Model Successfully Saved
{'EURUSD': "LassoLars(alpha=1.0, copy_X=True, eps=2.220446049250313e-16, fit_intercept=True,\n          fit_path=True, jitter=None, max_iter=500, normalize=True,\n          positive=False, precompute='auto', random_state=2681, verbose=False)", 'GBPUSD': 'HuberRegressor(alpha=0.0001, epsilon=1.35, fit_intercept=True, max_iter=100,\n               tol=1e-05, warm_start=False)', 'USDCAD': "Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,\n      normalize=False, positive=False, precompute=False, random_state=752,\n      selection='cyclic', tol=0.0001, warm_start=False)", 'USDCHF': "LassoLars(alpha=1.0, copy_X=True, eps=2.220446049250313e-16, fit_intercept=True,\n          fit_path=True, jitter=None, max_iter=500, normalize=True,\n          positive=False, precompute='auto', random_state=6389, verbose=False)", 'USDHKD': 'BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, alpha_init=None,\n              compute_score=False, copy_X