In [1]:
from itertools import chain, combinations
import numpy as np # Fundamental package for scientific computing with Python
import pandas as pd
import calendar
import time
from sklearn.metrics import accuracy_score, precision_score, f1_score
from lib.Utility import computeEquity, getData
from sklearn.linear_model import LogisticRegression

commissioni = 0.0008


In [2]:
# Train the model
def trainModel(X_train, y_train):
    # Set regularization rate
    reg = 0.01
    # train a logistic regression model on the training set
    model = LogisticRegression(C=1/reg, solver="liblinear", random_state=42).fit(X_train, y_train)

    return model

In [3]:
data = getData()

features = ['feat_Open', 'feat_vixOpen', 'feat_vixPrevClose', 'feat_vvixOpen', 'feat_vvixPrevClose', 'feat_Acc', 'feat_PrevCluster', 'feat_bigBodyRange']
combinazioni = chain.from_iterable(combinations(features, r) for r in range(3, len(features) + 1))

combos=[]
for idx,combinazione in enumerate(combinazioni):
  current_GMT = time.gmtime()
  time_stamp = calendar.timegm(current_GMT)

  print(idx,time.strftime("%H:%M:%S", time.localtime()), ':', combinazione)
  combos.append(combinazione)

dataframe = pd.DataFrame(columns=['DataSetName', 'TrainAccuracy', 'TrainPrecision', 'TrainF2','TrainRecall','TrainROC' ,'TestAccuracy', 'TestPrecision', 'TestF2','TestRecall','TestROC', 'BestParams'])

split_test_date = '2019-02-21'
start_date = '1990-09-04'

# Split Dataset
print("Performing splitting operations....")
train_data = data[(data['Date'] < split_test_date) & (data['Date'] > start_date)].reset_index(drop=True)
val_data = data[(data['Date'] < split_test_date) & (data['Date'] > start_date)].reset_index(drop=True)
test_data = data[data['Date'] >= split_test_date].reset_index(drop=True)

# Select 'feat' columns
print("Extracting features columns and creating target variable...")

print(combos)

for idx,combo in enumerate(combos):
  current_GMT = time.gmtime()
  time_stamp = calendar.timegm(current_GMT)
  print(idx, time.strftime("%H:%M:%S", time.localtime()),'Trying with following combo', combo)
  feat_cols = [col for col in combo if 'feat' in col]
  # Import Training and Validation Data
  x_train = train_data[feat_cols]
  y_train = np.where(train_data['Close'] > train_data['Open'], 1, -1)

  x_test = test_data[feat_cols]
  y_test = np.where(test_data['Close'] > test_data['Open'], 1, -1)

  result = trainModel(x_train, y_train)

  # Predictions
  y_train_pred = result.predict(x_train)
  y_test_pred = result.predict(x_test)

  # clf.fit(x_train,y_train)

  # y_train_pred = clf.predict(x_train)
  # y_test_pred = clf.predict(x_test)


  #VN Calcolo la equity
  test_data['cluster'] = y_test_pred
  test_equity, test_cagr= computeEquity(test_data, commissioni, feat_cols)

  print("Train metrics...")
  # Metrics
  train_accuracy = accuracy_score(y_train, y_train_pred)
  train_precision = precision_score(y_train, y_train_pred)

  print("Test metrics...")
  test_accuracy = accuracy_score(y_test, y_test_pred)
  test_precision = precision_score(y_test, y_test_pred)
  test_f1 = f1_score(y_test, y_test_pred)

  # Save Results
  new_row = pd.Series({
      'DataSetName': combo,
      'TrainAccuracy': train_accuracy,
      'TrainPrecision': train_precision,
      'TestAccuracy': test_accuracy,
      'TestPrecision': test_precision,
      'TestF1': test_f1,
      'BestParams': result.get_params,
      'TestEquity': test_equity,
      'TestCagr': test_cagr
  })
  print(new_row)
  dataframe=pd.concat([dataframe, new_row.to_frame().T], ignore_index=True)

dataframe.to_excel("Dataframe/"+str(time_stamp) + ".xlsx")

# print(result)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


0 15:08:54 : ('feat_Open', 'feat_vixOpen', 'feat_vixPrevClose')
1 15:08:54 : ('feat_Open', 'feat_vixOpen', 'feat_vvixOpen')
2 15:08:54 : ('feat_Open', 'feat_vixOpen', 'feat_vvixPrevClose')
3 15:08:54 : ('feat_Open', 'feat_vixOpen', 'feat_Acc')
4 15:08:54 : ('feat_Open', 'feat_vixOpen', 'feat_PrevCluster')
5 15:08:54 : ('feat_Open', 'feat_vixOpen', 'feat_bigBodyRange')
6 15:08:54 : ('feat_Open', 'feat_vixPrevClose', 'feat_vvixOpen')
7 15:08:54 : ('feat_Open', 'feat_vixPrevClose', 'feat_vvixPrevClose')
8 15:08:54 : ('feat_Open', 'feat_vixPrevClose', 'feat_Acc')
9 15:08:54 : ('feat_Open', 'feat_vixPrevClose', 'feat_PrevCluster')
10 15:08:54 : ('feat_Open', 'feat_vixPrevClose', 'feat_bigBodyRange')
11 15:08:54 : ('feat_Open', 'feat_vvixOpen', 'feat_vvixPrevClose')
12 15:08:54 : ('feat_Open', 'feat_vvixOpen', 'feat_Acc')
13 15:08:54 : ('feat_Open', 'feat_vvixOpen', 'feat_PrevCluster')
14 15:08:54 : ('feat_Open', 'feat_vvixOpen', 'feat_bigBodyRange')
15 15:08:54 : ('feat_Open', 'feat_vvixPre