In [1]:
import numpy as np
import pandas as pd
import joblib

In [2]:
def read_data(path, path_ihsg, 
              save_file = True,
              return_file = True,
              set_index = None):
    '''
    Read data from data folder in csv format.
    
    Parameters
    ----------
    path: str
          path to data
    
    '''
    
    emiten = pd.read_csv(path, index_col = set_index)
    ihsg = pd.read_csv(path_ihsg, index_col = set_index)
    
    def merge_emiten(emiten, ihsg):
        emiten = emiten.drop(['Open', 'High', 'Low', 'Adj Close'], axis = 1)
        ihsg = ihsg.drop(['Open', 'High', 'Low', 'Adj Close'], axis = 1)
        merged = pd.merge(emiten, ihsg, how='left', on='Date')
        merged.index = pd.to_datetime(merged.index)
        merged.rename(columns = {'Close_x':'Close', 'Volume_x':'Volume', 'Close_y':'Close_ihsg', 'Volume_y':'Volume_ihsg'}, inplace = True)
        merged['Close+1'] = merged['Close'].shift(-1)
        merged.dropna(inplace=True)
        merged.drop(merged.loc[merged["Volume"]==0].index, inplace=True)
        merged.drop(merged.loc[merged["Volume_ihsg"]==0].index, inplace=True)
        return merged

    if save_file:
        joblib.dump(merge_emiten(emiten, ihsg), "merged.pkl")
    
    if return_file:
        return merge_emiten(emiten, ihsg)



def split_input_output(dataset,
                       target_column,
                       save_file = True,
                       return_file = True):
    
    output_df = dataset[target_column]
    input_df = dataset.drop([target_column],
                            axis = 1)
    
    if save_file:
        joblib.dump(output_df, "output_df.pkl")
        joblib.dump(input_df, "input_df.pkl")
    
    if return_file:
        return output_df, input_df

def x_split(input_df, return_file=True, save_file=True):
    X_train = input_df[:int(input_df.shape[0]*0.6)]
    test_val = input_df[int(input_df.shape[0]*0.6):]
    X_val = test_val[:int(test_val.shape[0]*0.5)]
    X_test = test_val[int(test_val.shape[0]*0.5):]

    if save_file:
        joblib.dump(X_train, "X_train.pkl")
        joblib.dump(X_val, "X_val.pkl")
        joblib.dump(X_test, "X_test.pkl")

    if return_file:
        return X_train, X_val, X_test
        
def y_split(output_df, return_file=True, save_file=True):
    y_train = output_df[:int(output_df.shape[0]*0.6)]
    y_test_val = output_df[int(output_df.shape[0]*0.6):]
    y_val = y_test_val[:int(y_test_val.shape[0]*0.5)]
    y_test = y_test_val[int(y_test_val.shape[0]*0.5):]
    
    if save_file:
        joblib.dump(y_train, "y_train.pkl")
        joblib.dump(y_val, "y_valid.pkl")
        joblib.dump(y_test, "y_test.pkl")

    if return_file:
        return y_train, y_val, y_test

In [3]:
DATA_PATH = "data/AMRT.csv"
DATA_PATH_IHSG = "data/ihsg.csv"
TARGET_COLUMN = "Close+1"
INDEX_COLUMN = "Date"

data_house = read_data(DATA_PATH, DATA_PATH_IHSG,
                       set_index = INDEX_COLUMN)
output_df, input_df = split_input_output(
                            data_house,
                            TARGET_COLUMN)

X_train, X_val, X_test = x_split(input_df)
y_train, y_val, y_test = y_split(output_df)

In [40]:
def SMA(feature_sma):
    feature_smas = []
    periode = [5,20,60,120]
    for i in periode:
        feature_sma["i"] = feature_sma.Close.rolling(i, min_periods=1).mean()
    return feature_sma.append(feature_smas)

In [42]:
def EMA(feature_ema):
    feature_emas = []
    periodes = [0.1, 0.3]
    for i in periodes:
        feature_ema["i"] = feature_ema.Close.ewm(alpha=i, adjust=False).mean()
    return feature_ema.append(feature_emas)

In [43]:
EMA(X_train)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  feature_ema["i"] = feature_ema.Close.ewm(alpha=i, adjust=False).mean()


Unnamed: 0_level_0,Close,Volume,Close_ihsg,Volume_ihsg,5,20,60,120,i
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2009-01-15,39.5,313145000.0,1343.494019,8634100.0,39.500000,39.500000,39.500000,39.500000,39.500000
2009-01-16,40.0,47215000.0,1363.875977,19470600.0,39.750000,39.750000,39.750000,39.750000,39.650000
2009-01-19,40.0,11965000.0,1350.687012,13838000.0,39.833333,39.833333,39.833333,39.833333,39.755000
2009-01-20,39.5,5510000.0,1344.151001,10729800.0,39.750000,39.750000,39.750000,39.750000,39.678500
2009-01-21,39.0,3685000.0,1321.453003,9516800.0,39.600000,39.600000,39.600000,39.600000,39.474950
...,...,...,...,...,...,...,...,...,...
2017-01-03,620.0,127800.0,5275.971191,33217000.0,590.000000,549.750000,527.366667,547.225000,593.234468
2017-01-04,580.0,321200.0,5301.183105,53111300.0,595.000000,551.000000,528.533333,547.058333,589.264128
2017-01-05,580.0,1971300.0,5325.503906,77219400.0,597.000000,552.500000,529.616667,546.933333,586.484889
2017-01-06,590.0,108500.0,5347.021973,71615300.0,599.000000,555.750000,530.950000,546.933333,587.539423
