<a href="https://colab.research.google.com/github/u6k/ml-sandbox/blob/master/learn_11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount("/content/drive")

In [0]:
!pip install comet_ml

In [0]:
from comet_ml import Experiment

experiment = Experiment(api_key=COMET_ML_API_KEY, project_name="learn-stocks.11")

In [0]:
# CSVを読み込む

import pandas as pd

df_csv = pd.read_csv("drive/My Drive/projects/ml_data/stocks/nikkei_averages.csv", index_col=0)
df_csv

In [0]:
import numpy as np

df = df_csv.copy()

df = df[["date", "opening_price", "high_price", "low_price", "close_price"]]
df = df.sort_values("date")
df = df.drop_duplicates()
df = df.assign(id=np.arange(len(df)))
df = df.set_index("id")

# updown
updown = [np.nan]
for id in df[:-1].index:
  if df.at[id+1, "close_price"] > df.at[id, "close_price"]:
    updown.append(1)
  else:
    updown.append(0)

df = df.assign(updown=updown)

# return index
returns = df["close_price"].pct_change()
return_index = (1 + returns).cumprod()

df = df.assign(return_index=return_index)

df

In [0]:
x, y = [], []

for i in range(1, 1001):
  x.append(df[-i-31:-i-1]["return_index"].values)
  y.append(int(df[-i-1:-i]["updown"].values))

print(x)
print(y)

In [0]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y)

print("x_train.len:", len(x_train))
print("x_test.len:", len(x_test))
print("y_train.len:", len(y_train))
print("y_test.len:", len(y_test))

In [0]:
from sklearn import ensemble, metrics, model_selection

parameters = {
    "n_estimators": [10, 100, 200],
    "criterion": ["gini", "entropy"],
    "max_depth": [2, 4, 8, 16, 32, 64],
    "random_state": [1, 2, 3],
    "class_weight": ["balanced"]
}

experiment.log_parameters(parameters)

clf = model_selection.GridSearchCV(ensemble.RandomForestClassifier(),
                                   parameters,
                                   cv=5,
                                   n_jobs=-1,
                                   verbose=3)

clf_fit = clf.fit(x_train, y_train)

In [0]:
best_params = clf.best_params_

experiment.log_metrics(best_params)

best_params

In [0]:
clf_best = clf.best_estimator_

result = clf_best.predict(x_test)
ac_score=metrics.accuracy_score(y_test, result)

experiment.log_metric("accuracy_score", ac_score)

print(ac_score)

In [0]:
experiment.end()