In [1]:
import csv
import json

import numpy as np
import pandas as pd
from pycaret.regression import *

In [2]:
def substitute_values(data, sorted_index, currency, column):
    sub = 1

    # This list stores the values at which the labels were changed
    divider_list = [currency, column]

    for i, index in enumerate(sorted_index):
        if i == 33:
            sub += 1
            divider_list.append(data.at[index, column])
        if i == 66:
            sub += 1
            divider_list.append(data.at[index, column])

        data.at[index, column] = sub

    with open("divider_list.csv", "a", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(divider_list)

    return data

In [3]:
def main(currency_pairs):
    selected_models = {}
    with open("divider_list.csv", "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["Currency", "Column", "33rd", "66th"])

    for currency in currency_pairs:
        data = pd.read_csv("./results/{}.csv".format(currency))

        # Preprocessing
        # Removing the first row since it is an outlier - return is 0
        data = data.drop(data.index[0])
        # Delete any rows with null values
        data.dropna(inplace=True)
        # Delete any rows with 0 Vol and 0 FD
        data = data[data.vol != 0]
        data = data[data.fd != 0]
        # Multiply the returns column by 10000 to have a usable column
        data.return_val *= 10000
        # Resetting the indexes in the dataframe
        data.reset_index(drop=True, inplace=True)

        # Sorting the values independently as it worked the best
        volatility_array = np.array(data["vol"])
        # Argsort sorts the data and returns the indexes of the sorted values from original list
        sorted_index = np.argsort(volatility_array)
        # Using the sorted index positions, we can replace the values for
        data = substitute_values(data, sorted_index, currency, "vol")

        # Do the same actions for Fractal Dimension
        fd_array = np.array(data["fd"])
        sorted_index = np.argsort(fd_array)
        data = substitute_values(data, sorted_index, currency, "fd")

        # Using PyCaret functions to setup the regression model trainer
        regression = setup(data=data, target="return_val")

        # Compare the best models after training on different types
        best = compare_models()
        selected_models[currency] = best.__str__()

        # Choose the best regressor technique to create the model
        model = create_model(best)

        # Run model tune to get a better fit
        tuned_model = tune_model(model)

        # Mark the tuned model as final and save it
        model = finalize_model(tuned_model)
        save_model(model, "./models/{}".format(currency))
    print(json.dumps(selected_models, indent=4))

In [4]:
# A set of currency pairs
currency_pairs = [
    "EURUSD",
    "GBPUSD",
    "USDCAD",
    "USDCHF",
    "USDHKD",
    "USDAUD",
    "USDNZD",
    "USDSGD",
]

main(currency_pairs)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,4.329,28.258,5.3158,-0.0003,1.6207,1.0
1,7.432,167.4769,12.9413,-0.11,1.8561,1.0
2,2.6381,11.7467,3.4273,-0.0009,1.2562,1.0
3,1.582,4.1557,2.0386,-0.2855,0.9706,1.0
4,1.7016,4.7408,2.1773,-0.0029,1.0011,1.0
5,3.3669,15.2315,3.9028,-0.1939,1.4444,1.0
6,1.7496,4.496,2.1204,-0.0,1.0257,1.0
7,1.3429,3.9771,1.9943,-0.0017,0.8758,1.0
8,2.5323,8.5264,2.92,-0.286,1.2567,1.0
9,2.0373,5.1769,2.2753,-0.0013,1.1125,1.0


Transformation Pipeline and Model Successfully Saved
{
    "EURUSD": "LassoLars(alpha=1.0, copy_X=True, eps=2.220446049250313e-16, fit_intercept=True,\n          fit_path=True, jitter=None, max_iter=500, normalize=True,\n          positive=False, precompute='auto', random_state=5127, verbose=False)",
    "GBPUSD": "Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,\n      normalize=False, positive=False, precompute=False, random_state=3938,\n      selection='cyclic', tol=0.0001, warm_start=False)",
    "USDCAD": "LassoLars(alpha=1.0, copy_X=True, eps=2.220446049250313e-16, fit_intercept=True,\n          fit_path=True, jitter=None, max_iter=500, normalize=True,\n          positive=False, precompute='auto', random_state=5289, verbose=False)",
    "USDCHF": "LassoLars(alpha=1.0, copy_X=True, eps=2.220446049250313e-16, fit_intercept=True,\n          fit_path=True, jitter=None, max_iter=500, normalize=True,\n          positive=False, precompute='auto', random_state=4243, verbo