# **Machine Learning Training**

### Initial Imports:

In [5]:
import numpy as np
import pandas as pd
pd.set_option("display.max_colwidth", 300)

from pathlib import Path

import warnings
warnings.filterwarnings('ignore')

In [6]:
# Set Initial Random State
from numpy.random import seed
seed(1)
from tensorflow import random
random.set_seed(2)

In [9]:
ticker = "TSLA"

#### Read in CSV as Pandas DataFrame:

In [7]:
# Set path to CSV and read in CSV
csv_path = Path('Returns and Signals.csv')
return_signal_df=pd.read_csv(csv_path)
# Set index as datetime object and drop extraneous columns
return_signal_df.set_index(pd.to_datetime(return_signal_df['Date'], infer_datetime_format=True), inplace=True)
return_signal_df.drop(columns=['Date'], inplace=True)
return_signal_df.head()

Unnamed: 0_level_0,TSLA_Adj Close,TSLA_Volume,TSLA_Returns,ema_crossover_signal,vol_trend_signal,bollinger_signal,obv_crossover_signal,sentiment_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-01-02,62.023998,58293000.0,-0.068149,-1.0,0.0,0.0,-1.0,0.0
2019-01-03,60.071999,34826000.0,-0.031472,-1.0,1.0,0.0,-1.0,-1.0
2019-01-04,63.537998,36970500.0,0.057697,-1.0,1.0,0.0,-1.0,0.0
2019-01-07,66.991997,37756000.0,0.054361,1.0,-1.0,0.0,1.0,0.0
2019-01-08,67.07,35042500.0,0.001164,1.0,1.0,0.0,1.0,0.0


#### Check Data Quality:

In [8]:
return_signal_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 504 entries, 2019-01-02 to 2020-12-30
Data columns (total 8 columns):
TSLA_Adj Close          504 non-null float64
TSLA_Volume             504 non-null float64
TSLA_Returns            504 non-null float64
ema_crossover_signal    504 non-null float64
vol_trend_signal        504 non-null float64
bollinger_signal        504 non-null float64
obv_crossover_signal    504 non-null float64
sentiment_signal        504 non-null float64
dtypes: float64(8)
memory usage: 35.4 KB


### Creating X and Dependent Variables:

In [17]:
def variables(return_signal_df):
    '''
    this func takes in return_signal_df,
    then shiting X variables down (to the future) by 1,
    then constructing a new column for the dependent variable
    return modified return_signal_df.
    '''
    # Set X Variables/Features 
    global x_var_list
    x_var_list = ['ema_crossover_signal','vol_trend_signal','bollinger_signal','obv_crossover_signal','sentiment_signal'] # as a filter
    # Shift 1 down as signals comes after prices 
    return_signal_df[x_var_list] = return_signal_df[x_var_list].shift(1)
    # Construct the dependent variable where if daily return is greater than 0, then 1, else, 0.
    return_signal_df['Positive Return'] = np.where(return_signal_df[f'{ticker}_Returns'] > 0, 1.0, 0.0)
    return_signal_df.dropna(inplace=True)
    return return_signal_df

In [18]:
return_signal_df_modified = variables(return_signal_df)
return_signal_df_modified.head()

Unnamed: 0_level_0,TSLA_Adj Close,TSLA_Volume,TSLA_Returns,ema_crossover_signal,vol_trend_signal,bollinger_signal,obv_crossover_signal,sentiment_signal,Positive Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019-01-07,66.991997,37756000.0,0.054361,-1.0,0.0,0.0,-1.0,0.0,1.0
2019-01-08,67.07,35042500.0,0.001164,-1.0,1.0,0.0,-1.0,-1.0,1.0
2019-01-09,67.706001,27164500.0,0.009483,-1.0,1.0,0.0,-1.0,0.0,1.0
2019-01-10,68.994003,30282000.0,0.019023,1.0,-1.0,0.0,1.0,0.0,1.0
2019-01-11,69.452003,25195500.0,0.006638,1.0,1.0,0.0,1.0,0.0,1.0


### Separate X and Y Training Datasets:
80% (training) 20% (testing) split

In [19]:
def separate(return_signal_df_modified):
    '''
    thif func takes in return_signal_df_modified,
    split the data into trian (80%) and test (20%),
    then construct and return trian and test data sets for 
    the X and dependent variables.
    '''
    split = int(0.8 * len(return_signal_df_modified))
    # X Variable
    x_train = return_signal_df_modified[x_var_list][:split]
    x_test = return_signal_df_modified[x_var_list][split:]
    # Dependent Variable
    y_train = return_signal_df_modified['Positive Return'][:split]
    y_test = return_signal_df_modified['Positive Return'][split:]
    return x_train, x_test, y_train, y_test

In [20]:
x_train, x_test, y_train, y_test = separate(return_signal_df_modified)

In [24]:
x_train.head(2)

Unnamed: 0_level_0,ema_crossover_signal,vol_trend_signal,bollinger_signal,obv_crossover_signal,sentiment_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-01-07,-1.0,0.0,0.0,-1.0,0.0
2019-01-08,-1.0,1.0,0.0,-1.0,-1.0


In [23]:
x_test.head(2)

Unnamed: 0_level_0,ema_crossover_signal,vol_trend_signal,bollinger_signal,obv_crossover_signal,sentiment_signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-08-07,1.0,1.0,0.0,1.0,0.0
2020-08-10,1.0,1.0,0.0,1.0,0.0


In [22]:
y_train.head(2)

Date
2019-01-07    1.0
2019-01-08    1.0
Name: Positive Return, dtype: float64

In [21]:
y_test.head(3)

Date
2020-08-07    0.0
2020-08-10    0.0
2020-08-11    0.0
Name: Positive Return, dtype: float64

### Machine Learning Models Training: