In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

In [2]:
data = pd.read_csv('inputs/USDJPY_M15.csv', sep='\t', names=('date', 'time', 'open', 'high', 'low', 'close'), usecols=[0, 1, 2, 3, 4, 5], skiprows=1)
data['datetime'] = pd.to_datetime(data['date']  + ' ' + data['time'])
data.drop(['date', 'time'], axis=1, inplace=True)
data

Unnamed: 0,open,high,low,close,datetime
0,107.547,107.567,107.455,107.518,2020-04-01 00:00:00
1,107.520,107.545,107.469,107.540,2020-04-01 00:15:00
2,107.540,107.544,107.476,107.492,2020-04-01 00:30:00
3,107.492,107.519,107.465,107.481,2020-04-01 00:45:00
4,107.481,107.549,107.481,107.547,2020-04-01 01:00:00
...,...,...,...,...,...
26872,108.782,108.793,108.753,108.779,2021-04-30 05:45:00
26873,108.779,108.819,108.769,108.814,2021-04-30 06:00:00
26874,108.814,108.846,108.808,108.845,2021-04-30 06:15:00
26875,108.845,108.859,108.831,108.846,2021-04-30 06:30:00


In [3]:
# all_data = pd.merge(data, ni225, on='datetime', suffixes=['', '_ni225']).drop(['date_ni225', 'time_ni225'], axis=1)
all_data = data.copy()
all_data

Unnamed: 0,open,high,low,close,datetime
0,107.547,107.567,107.455,107.518,2020-04-01 00:00:00
1,107.520,107.545,107.469,107.540,2020-04-01 00:15:00
2,107.540,107.544,107.476,107.492,2020-04-01 00:30:00
3,107.492,107.519,107.465,107.481,2020-04-01 00:45:00
4,107.481,107.549,107.481,107.547,2020-04-01 01:00:00
...,...,...,...,...,...
26872,108.782,108.793,108.753,108.779,2021-04-30 05:45:00
26873,108.779,108.819,108.769,108.814,2021-04-30 06:00:00
26874,108.814,108.846,108.808,108.845,2021-04-30 06:15:00
26875,108.845,108.859,108.831,108.846,2021-04-30 06:30:00


In [4]:
# extract features from date
all_data['day'] = [i.day for i in all_data['datetime']]
all_data['month'] = [i.month for i in all_data['datetime']]
all_data['year'] = [i.year for i in all_data['datetime']]
all_data['day_of_week'] = [i.dayofweek for i in all_data['datetime']]
all_data['day_of_year'] = [i.dayofyear for i in all_data['datetime']]

all_data['hour'] = [i.hour for i in all_data['datetime']]
all_data['minute'] = [i.minute for i in all_data['datetime']]

all_data

Unnamed: 0,open,high,low,close,datetime,day,month,year,day_of_week,day_of_year,hour,minute
0,107.547,107.567,107.455,107.518,2020-04-01 00:00:00,1,4,2020,2,92,0,0
1,107.520,107.545,107.469,107.540,2020-04-01 00:15:00,1,4,2020,2,92,0,15
2,107.540,107.544,107.476,107.492,2020-04-01 00:30:00,1,4,2020,2,92,0,30
3,107.492,107.519,107.465,107.481,2020-04-01 00:45:00,1,4,2020,2,92,0,45
4,107.481,107.549,107.481,107.547,2020-04-01 01:00:00,1,4,2020,2,92,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...
26872,108.782,108.793,108.753,108.779,2021-04-30 05:45:00,30,4,2021,4,120,5,45
26873,108.779,108.819,108.769,108.814,2021-04-30 06:00:00,30,4,2021,4,120,6,0
26874,108.814,108.846,108.808,108.845,2021-04-30 06:15:00,30,4,2021,4,120,6,15
26875,108.845,108.859,108.831,108.846,2021-04-30 06:30:00,30,4,2021,4,120,6,30


In [5]:
# dataset = all_data[['open', 'high', 'low', 'close', 'tickvol', 'day', 'month', 'year', 'day_of_week', 'day_of_year', 'hour', 'minute', 'open_ni225', 'high_ni225', 'low_ni225', 'close_ni225', 'tickvol_ni225']]
dataset = all_data[['open', 'high', 'low', 'close', 'day', 'month', 'year', 'day_of_week', 'day_of_year', 'hour', 'minute']]
dataset


Unnamed: 0,open,high,low,close,day,month,year,day_of_week,day_of_year,hour,minute
0,107.547,107.567,107.455,107.518,1,4,2020,2,92,0,0
1,107.520,107.545,107.469,107.540,1,4,2020,2,92,0,15
2,107.540,107.544,107.476,107.492,1,4,2020,2,92,0,30
3,107.492,107.519,107.465,107.481,1,4,2020,2,92,0,45
4,107.481,107.549,107.481,107.547,1,4,2020,2,92,1,0
...,...,...,...,...,...,...,...,...,...,...,...
26872,108.782,108.793,108.753,108.779,30,4,2021,4,120,5,45
26873,108.779,108.819,108.769,108.814,30,4,2021,4,120,6,0
26874,108.814,108.846,108.808,108.845,30,4,2021,4,120,6,15
26875,108.845,108.859,108.831,108.846,30,4,2021,4,120,6,30


In [6]:
dataset['y'] = dataset['high'].shift(-1)
dataset[['y', 'high']]

Unnamed: 0,y,high
0,107.545,107.567
1,107.544,107.545
2,107.519,107.544
3,107.549,107.519
4,107.549,107.549
...,...,...
26872,108.819,108.793
26873,108.846,108.819
26874,108.859,108.846
26875,108.852,108.859


In [7]:
for i in range(1, 13):
    dataset['shift%s'%i] = dataset['open'].shift(i)

dataset['sma5'] = dataset['open'].rolling(5).mean()
dataset['sma15'] = dataset['open'].rolling(15).mean()
# dataset['sma50'] = dataset['open'].rolling(50).mean()
# dataset['sma100'] = dataset['open'].rolling(100).mean()
dataset

Unnamed: 0,open,high,low,close,day,month,year,day_of_week,day_of_year,hour,...,shift5,shift6,shift7,shift8,shift9,shift10,shift11,shift12,sma5,sma15
0,107.547,107.567,107.455,107.518,1,4,2020,2,92,0,...,,,,,,,,,,
1,107.520,107.545,107.469,107.540,1,4,2020,2,92,0,...,,,,,,,,,,
2,107.540,107.544,107.476,107.492,1,4,2020,2,92,0,...,,,,,,,,,,
3,107.492,107.519,107.465,107.481,1,4,2020,2,92,0,...,,,,,,,,,,
4,107.481,107.549,107.481,107.547,1,4,2020,2,92,1,...,,,,,,,,,107.5160,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26872,108.782,108.793,108.753,108.779,30,4,2021,4,120,5,...,108.812,108.879,108.930,108.996,108.955,108.956,108.906,108.863,108.7952,108.870000
26873,108.779,108.819,108.769,108.814,30,4,2021,4,120,6,...,108.854,108.812,108.879,108.930,108.996,108.955,108.956,108.906,108.7802,108.862200
26874,108.814,108.846,108.808,108.845,30,4,2021,4,120,6,...,108.801,108.854,108.812,108.879,108.930,108.996,108.955,108.956,108.7828,108.857733
26875,108.845,108.859,108.831,108.846,30,4,2021,4,120,6,...,108.786,108.801,108.854,108.812,108.879,108.930,108.996,108.955,108.7946,108.856533


In [8]:
dataset = dataset[100:-1]
dataset

Unnamed: 0,open,high,low,close,day,month,year,day_of_week,day_of_year,hour,...,shift5,shift6,shift7,shift8,shift9,shift10,shift11,shift12,sma5,sma15
100,107.104,107.168,107.071,107.146,2,4,2020,3,93,1,...,107.167,107.132,107.057,107.130,106.960,107.050,107.094,107.229,107.0960,107.112667
101,107.145,107.199,107.117,107.122,2,4,2020,3,93,1,...,107.150,107.167,107.132,107.057,107.130,106.960,107.050,107.094,107.0950,107.105733
102,107.122,107.154,107.074,107.150,2,4,2020,3,93,1,...,107.131,107.150,107.167,107.132,107.057,107.130,106.960,107.050,107.0932,107.104400
103,107.149,107.227,107.148,107.200,2,4,2020,3,93,1,...,107.041,107.131,107.150,107.167,107.132,107.057,107.130,106.960,107.1148,107.099067
104,107.200,107.258,107.185,107.215,2,4,2020,3,93,2,...,107.054,107.041,107.131,107.150,107.167,107.132,107.057,107.130,107.1440,107.106133
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26871,108.753,108.781,108.745,108.780,30,4,2021,4,120,5,...,108.879,108.930,108.996,108.955,108.956,108.906,108.863,108.881,108.8012,108.879000
26872,108.782,108.793,108.753,108.779,30,4,2021,4,120,5,...,108.812,108.879,108.930,108.996,108.955,108.956,108.906,108.863,108.7952,108.870000
26873,108.779,108.819,108.769,108.814,30,4,2021,4,120,6,...,108.854,108.812,108.879,108.930,108.996,108.955,108.956,108.906,108.7802,108.862200
26874,108.814,108.846,108.808,108.845,30,4,2021,4,120,6,...,108.801,108.854,108.812,108.879,108.930,108.996,108.955,108.956,108.7828,108.857733


In [9]:
X = dataset.drop(['y'], axis=1)
y = dataset['y']

In [10]:
from sklearn.model_selection import KFold, train_test_split

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, shuffle=False)

In [13]:
import pickle
model = pickle.load(open('model.pkl','rb'))

In [14]:
model.predict(X_test)

array([105.44035 , 105.441666, 105.47711 , ..., 108.82218 , 108.836365,
       108.86949 ], dtype=float32)

In [29]:
from sqlalchemy import create_engine
engine = create_engine('postgresql://uuyjfnwuatnpap:4d18123111fa6f0b88845a704e1403dfc61f9158ee4d07f9ff2946f6cf52e695@ec2-54-167-152-185.compute-1.amazonaws.com:5432/d91p0vsmube1iu')

In [30]:
import pandas
df = pandas.read_sql_query('select time, open, high, low, close from t_tick order by time desc limit 200', engine)
df

Unnamed: 0,time,open,high,low,close
0,1619781300,108.878,108.881,108.842,108.846
1,1619780400,108.885,108.908,108.851,108.878
2,1619779500,108.868,108.890,108.851,108.885
3,1619778600,108.799,108.874,108.796,108.868
4,1619777700,108.836,108.852,108.791,108.801
...,...,...,...,...,...
195,1619605800,109.028,109.033,108.959,108.984
196,1619604900,109.036,109.053,109.013,109.029
197,1619604000,109.053,109.075,109.028,109.036
198,1619603100,108.985,109.054,108.984,109.053


In [33]:
df.sort_values('time', ascending=False)

Unnamed: 0,time,open,high,low,close
0,1619781300,108.878,108.881,108.842,108.846
1,1619780400,108.885,108.908,108.851,108.878
2,1619779500,108.868,108.890,108.851,108.885
3,1619778600,108.799,108.874,108.796,108.868
4,1619777700,108.836,108.852,108.791,108.801
...,...,...,...,...,...
195,1619605800,109.028,109.033,108.959,108.984
196,1619604900,109.036,109.053,109.013,109.029
197,1619604000,109.053,109.075,109.028,109.036
198,1619603100,108.985,109.054,108.984,109.053
