In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

In [None]:
data = pd.read_csv('inputs/USDJPY_H1.csv', sep='\t', names=('date', 'time', 'open', 'high', 'low', 'close'), usecols=[0, 1, 2, 3, 4, 5], skiprows=1)
data['datetime'] = pd.to_datetime(data['date']  + ' ' + data['time'])
data.drop(['date', 'time'], axis=1, inplace=True)
data

In [None]:
# all_data = pd.merge(data, ni225, on='datetime', suffixes=['', '_ni225']).drop(['date_ni225', 'time_ni225'], axis=1)
all_data = data.copy()
all_data

In [None]:
# extract features from date
all_data['day'] = [i.day for i in all_data['datetime']]
all_data['month'] = [i.month for i in all_data['datetime']]
all_data['year'] = [i.year for i in all_data['datetime']]
all_data['day_of_week'] = [i.dayofweek for i in all_data['datetime']]
all_data['day_of_year'] = [i.dayofyear for i in all_data['datetime']]

all_data['hour'] = [i.hour for i in all_data['datetime']]
all_data['minute'] = [i.minute for i in all_data['datetime']]

all_data

In [None]:
# dataset = all_data[['open', 'high', 'low', 'close', 'tickvol', 'day', 'month', 'year', 'day_of_week', 'day_of_year', 'hour', 'minute', 'open_ni225', 'high_ni225', 'low_ni225', 'close_ni225', 'tickvol_ni225']]
dataset = all_data[['open', 'high', 'low', 'close', 'day', 'month', 'year', 'day_of_week', 'day_of_year', 'hour', 'minute']]
dataset


In [None]:
dataset['y'] = dataset['high'].shift(-1)
dataset[['y', 'high']]

In [None]:
for i in range(1, 13):
    dataset['shift%s'%i] = dataset['high'].shift(i)

dataset['sma5'] = dataset['high'].rolling(5).mean()
dataset['sma15'] = dataset['high'].rolling(15).mean()
dataset['H1_high'] = dataset['high'].rolling(25).max()
dataset['H1_low'] = dataset['low'].rolling(25).min()
dataset

In [None]:
dataset = dataset[100:-1]
dataset

In [None]:
dataset[['y', 'high']]

In [None]:
dataset.isnull().any()

In [None]:
X = dataset.drop(['y'], axis=1)
y = dataset['y']

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold, train_test_split

In [None]:
# X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=False)
# X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.3, shuffle=False)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=False)


In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

folds = KFold(n_splits = 5, shuffle = True)

# params = {'max_depth': [5, 7, 9, 11], 'random_state': [0], 'n_estimators': list(range(50, 200, 50)), 'learning_rate': [0.1, 0.01, 0.001], 'n_estimators': [100, 500]}
params = {'objective': ['reg:squarederror'], 'max_depth': list(range(2, 10)), 'random_state': [0], 'n_estimators': list(range(50, 200, 50)), 'learning_rate': [0.1, 0.01, 0.001]}
# params = {'max_depth': list(range(2, 10)), 'random_state': [0], 'n_estimators': list(range(50, 200, 50))}
# params = {'max_depth': [5, 6, 7], 'random_state': [0], 'n_estimators': [500], 'learning_rate': [.03, .05, .07], 'min_child_weight': [4]}
# params = {'max_depth': list(range(2, 10)), 'random_state': [0], 'n_estimators': list(range(50, 200, 50))}

# XGBoost
xgb = XGBRegressor()

reg_cv_xgb = GridSearchCV(xgb, params, cv=folds, return_train_score=True, verbose=100)
reg_cv_xgb.fit(X_train, y_train)

In [None]:
print(reg_cv_xgb.best_params_)
y_pred = reg_cv_xgb.predict(X_val)
y_pred

In [None]:
y_.shape, y_train.shape

In [None]:
y_ = np.concatenate([np.array([None for i in range(len(y_train))]) , y_pred])
y_ = pd.DataFrame(y_, index=X.index)

plt.figure(figsize=(16,5))
plt.plot(y, label='original')
plt.plot(y_, '--', label='predict')
plt.legend()

In [None]:
plt.figure(figsize=(16,5))
plt.plot(y[-50:], label='original')
plt.plot(y_[-50:], '--', label='predict')
plt.legend()

In [None]:
plt.figure(figsize=(16,5))
plt.plot(y[-200:], label='original')
plt.plot(y_[-200:], '--', label='predict')
plt.legend()

In [None]:
import pickle

filename='model_H1.pkl'
pickle.dump(reg_cv_xgb.best_estimator_, open(filename, 'wb'))