<a href="https://colab.research.google.com/github/u6k/ml-sandbox/blob/master/predict_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount("/content/drive")

In [0]:
!pip install chainerrl comet_ml

In [0]:
from comet_ml import Experiment

experiment = Experiment(api_key=COMET_ML_API_KEY, project_name="predict-stocks.1")

In [0]:
# CSVを読み込む

import pandas as pd

df_csv = pd.read_csv("drive/My Drive/projects/ml_data/stocks/nikkei_averages.csv")
df_csv

In [0]:
import numpy as np

df = df_csv.copy()

df = df[["date", "opening_price", "high_price", "low_price", "close_price"]]
df = df.sort_values("date")
df = df.drop_duplicates()
df = df.assign(id=np.arange(len(df)))
df = df.set_index("id")

df

In [0]:
import sklearn.preprocessing as sp

df_input = df[-600:].copy()
df_input = df_input.assign(scaled_close_price=sp.minmax_scale(df_input["close_price"]))

df_learn = df_input[:500].copy()
df_test = df_input[500:].copy()

df_input

In [0]:
df_learn

In [0]:
df_test

In [0]:
x, y = [], []

INPUT_LEN = 20

for row in range(len(df_learn) - INPUT_LEN):
  x.append(df_learn["scaled_close_price"][row:row+INPUT_LEN].values)
  y.append(df_learn["scaled_close_price"][row+INPUT_LEN:row+INPUT_LEN+1].values)

x = np.array(x).reshape(len(x), INPUT_LEN, 1)
y = np.array(y).reshape(len(y), 1)

print("*** x ***")
print(len(x))
print(x)
print("*** y ***")
print(len(y))
print(y)

In [0]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

length_of_sequence = INPUT_LEN
in_out_neurons = 1
n_hidden = 300

batch_size = 128
epochs = 500

model = Sequential()
model.add(LSTM(n_hidden,
               return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(n_hidden,
               return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(n_hidden,
               return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(n_hidden,
               return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(n_hidden,
               return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(n_hidden,
               return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(n_hidden,
               return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(n_hidden,
               return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(n_hidden,
               return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(n_hidden))
model.add(Dense(in_out_neurons))
model.add(Activation("linear"))
model.compile(loss="mean_squared_error", optimizer=Adam(lr=0.001))

history = model.fit(x,
                    y,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_split=0.2)

In [0]:
import matplotlib.pyplot as plt

loss = history.history["loss"]
val_loss = history.history["val_loss"]
epochs = range(1, len(loss)+1)

plt.plot(epochs, loss, "bo", label="Training loss")
plt.plot(epochs, val_loss, "b", label="Validation loss")
plt.title("Training and Validation loss")
plt.legend()
plt.show()

In [0]:
test_x, test_y = [], []

INPUT_LEN = 20

for row in range(len(df_test) - INPUT_LEN):
  test_x.append(df_test["scaled_close_price"][row:row+INPUT_LEN].values)
  test_y.append(df_test["scaled_close_price"][row+INPUT_LEN:row+INPUT_LEN+1].values)

test_x = np.array(test_x).reshape(len(test_x), INPUT_LEN, 1)
test_y = np.array(test_y).reshape(len(test_y), 1)

print("*** test_x ***")
print(len(test_x))
print(test_x)
print("*** test_y ***")
print(len(test_y))
print(test_y)

In [0]:
result_y = model.predict(test_x)

print(len(result_y))
print(result_y)

In [0]:
df_result = pd.DataFrame({"id":np.arange(len(test_y)),
                          "original":np.array(test_y).reshape(len(test_y)),
                          "predict":np.array(result_y).reshape(len(result_y))})
df_result = df_result.set_index("id")

experiment.log_asset_data(df_result.to_csv(), file_name="result.csv")

df_result

In [0]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(20, 5))
subplot = fig.add_subplot(111)
subplot.plot(df_result["original"], label="original")
subplot.plot(df_result["predict"], label="predict")
subplot.legend()

plt.show()

experiment.log_figure(figure_name="result", figure=fig)

In [0]:
experiment.end()