# 単純なアルゴリズムで推論する
相関があると考えられる指標（銘柄）同士で比較する

- topix, nikkei225 vs s&p 500, nasdaq, dow

In [None]:
from pathlib import Path

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
sp500_path = Path("../data/us_etf/SPXL.csv")
nasdaq_path = Path("")
dow_path = Path("../data/us_etf/DIA.csv")

topix_path = Path("")
nikkei_path = Path("../data/etfs/1458.csv")
nikkei_inv_path = Path("../data/etfs/1459.csv")

sp500_df = pd.read_csv(sp500_path)
dow_df = pd.read_csv(dow_path)
nikkei_df = pd.read_csv(nikkei_path)
nikkei_inv_df = pd.read_csv(nikkei_inv_path)

In [None]:
def plot_df(df, start_idx=-100, column=1):
    arr = df.to_numpy()
    plt.plot(arr[start_idx:, column] / arr[start_idx, column])

plot_df(sp500_df)
plot_df(dow_df)
plot_df(nikkei_df)

In [None]:
sp500_data = sp500_df.to_numpy()
dow_data = dow_df.to_numpy()
nikkei_data = nikkei_df.to_numpy()

pairs = []
for i in range(len(nikkei_data)):
    for j in range(len(sp500_data)):
        if nikkei_data[i, 0] == sp500_data[j, 0]:
            pairs.append((nikkei_data[i], sp500_data[j], dow_data[j]))

pairs = np.array(sorted(pairs, key=lambda x: x[0][0]))

In [None]:
def calc_profit(us_data, jp_data):
    change = us_data[1:, 4] - us_data[:-1, 4]
    target = jp_data[1:][change > 0]
    earn = target[:, 4] - target[:, 1]
    print("Earn = {}".format(earn.sum()))
    return earn

In [None]:
sp500_data = pairs[:, 1]
dow_data = pairs[:, 2]
jp_data = pairs[:, 0]
sp500_change = sp500_data[1:, 4] - sp500_data[:-1, 4]
dow_change = dow_data[1:, 4] - dow_data[:-1, 4]
us_up = np.logical_xor(sp500_change > 0, dow_change > 0)
jp_change = jp_data[:-1, 4] - jp_data[:-1, 1]

plt.axvline(0)
plt.axhline(0)
plt.scatter(sp500_change[us_up], jp_change[us_up], s=1)

In [None]:
changes = jp_change[us_change > 0]
(changes > 0).sum(), len(changes)

In [None]:
calc_profit(pairs[:, 1], pairs[:, 0])

In [None]:
data = nikkei_df.to_numpy()

benefits = []
for _, start, high, low, end, _ in data:
    short = start * 1
    long = start * 1
    if short <= high and long >= low:
        benefits.append(short - long)
    elif short > high and long < low:
        benefits.append(0)
    elif short <= high:
        assert long < low
        benefits.append(short - end)
    else:
        assert long >= low and short > high
        benefits.append(end - long)

sum(benefits)

In [None]:
csv_dir = Path("../data/etfs")
csvs = sorted(csv_dir.glob("*.csv"))

In [None]:
benefits = []
maximum = 0
maximum_idx = -1
for idx, csv in enumerate(csvs):
    df = pd.read_csv(csv)
    data = df.to_numpy()
    benefit = (data[:, 4] - data[:, 1]).sum()
    benefits.append(benefit)
    if benefit > maximum:
        maximum = benefit
        maximum_idx = idx

In [None]:
maximum, maximum_idx, csvs[maximum_idx]

In [None]:
sorted(benefits)

In [None]:
benefits[len(benefits) // 2]

In [None]:
plt.plot(sorted(benefits))
# plt.ylim([-10000, 10000])
plt.axhline(0)

## 前日高値（安値）より高い（安い）場合のみ買う（売る）

In [None]:
nikkei_path = Path("../data/etfs/1458.csv")
nikkei_df = pd.read_csv(nikkei_path)
data = nikkei_df.to_numpy()
nikkei_df.head()

In [None]:
mean_change = np.abs(data[:, 4] - data[:, 1]).mean()
mean_end = data[:, 4].mean()
print(f"mean change = {mean_change}, mean end = {mean_end}")

In [None]:
sum = 0
count = 0
arr = []
accum = [0]
indices = []
start = 500
end = data.shape[0] - 500
for i in range(start, end):
    # if data[i, 1] < data[i - 1, 3] or True:
    if data[i, 1] < data[i - 1, 3]:
        # sum += data[i, 4] - data[i, 1]
        profit = data[i, 1] - data[i, 4]
        sum += profit
        count += 1
        accum.append(accum[-1] + profit)
        arr.append(profit)
        indices.append(i - start)

print(f"sum = {sum}, count = {count }, mean = {sum / count:.2f}, percent = {sum / count / mean_end * 100:.2f}")

plt.plot(indices, accum[1:])
plt.grid()
plt.plot(data[start:end, 4])

In [None]:
def boxplot(start: np.ndarray, high: np.ndarray, low: np.ndarray, end: np.ndarray, box_width=0.5):
    """
    """
    fig, ax = plt.subplots()
    max_val = high.max() * 1.05
    min_val = low.min() * 0.95

    ax.set_xlim([0 - box_width, len(start)])
    ax.set_ylim([min_val, max_val])

    acsending_indices = []
    acsending_box_xmins = []
    acsending_box_xmaxs = []
    acsending_xmins = []
    acsending_xmaxs = []
    decending_indices = []
    decending_boxes_xmins = []
    decending_boxes_xmaxs = []
    decending_xmins = []
    decending_xmaxs = []
    for idx, (s, h, l, e) in enumerate(zip(start, high, low, end)):
        if s < e:
            acsending_indices.append(idx)
            acsending_box_xmins.append(s)
            acsending_box_xmaxs.append(e)
            acsending_xmins.append(l)
            acsending_xmaxs.append(h)
        else:
            decending_indices.append(idx)
            decending_boxes_xmins.append(e)
            decending_boxes_xmaxs.append(s)
            decending_xmins.append(l)
            decending_xmaxs.append(h)

    points = 72.0 / fig.dpi
    left_pix, _ = ax.transData.transform((0.0, 1.0))
    right_pix, _ = ax.transData.transform((box_width, 1.0))
    box_linewidth = (right_pix - left_pix)  * points
    linewidth = min(box_linewidth * 0.3, 1)

    ax.vlines(acsending_indices, acsending_xmins, acsending_xmaxs, color="red", linewidth=linewidth)
    ax.vlines(acsending_indices, acsending_box_xmins, acsending_box_xmaxs, color="red", linewidth=box_linewidth)
    ax.vlines(decending_indices, decending_xmins, decending_xmaxs, color="blue", linewidth=linewidth)
    ax.vlines(decending_indices, decending_boxes_xmins, decending_boxes_xmaxs, color="blue", linewidth=box_linewidth)
    ax.grid()

In [None]:
max_idx = 100
boxplot(data[:max_idx, 1], data[:max_idx, 2], data[:max_idx, 3], data[:max_idx, 4])