In [769]:
import tushare as ts
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [770]:
pro = ts.pro_api()


In [771]:
def SMA(df,  short_term_days, long_term_days):
    # short term
    SMA_short = pd.DataFrame()
    SMA_short['close'] = df['close'].rolling(window=short_term_days).mean()
    SMA_short

    # long term
    SMA_long = pd.DataFrame()
    SMA_long['close'] = df['close'].rolling(window=long_term_days).mean()
    SMA_long

    data = pd.DataFrame()
    data['close'] = df['close']
    data['SMA_short'] = SMA_short['close']
    data['SMA_long'] = SMA_long['close']
    data['trade_date'] = pd.to_datetime(df['trade_date']).dt.strftime('%Y-%m-%d')
    data['date'] = pd.to_datetime(df['trade_date'])

    data.set_index(['date'], inplace=True)

    return data


In [772]:
# Double SMA Crossing Strategy
def buy_sell(data):
    sig_price_buy = []
    sig_price_sell = []
    flag = -1

    for i in range(len(data)):
        if data['SMA_short'][i] > data['SMA_long'][i]:
            if flag != 1:
                sig_price_buy.append(data['close'][i])
                sig_price_sell.append(np.nan)
                flag = 1
            else:
                sig_price_buy.append(np.nan)
                sig_price_sell.append(np.nan)
        elif data['SMA_short'][i] < data['SMA_long'][i]:
            if flag != 0:
                sig_price_buy.append(np.nan)
                sig_price_sell.append(data['close'][i])
                flag = 0
            else:
                sig_price_buy.append(np.nan)
                sig_price_sell.append(np.nan)
        else:
            sig_price_buy.append(np.nan)
            sig_price_sell.append(np.nan)
    return (sig_price_buy, sig_price_sell)


In [773]:
# date and buy_sell
def dates(data):
    buy_dates = data.loc[data['buy_signal_price'] > 0]
    sell_dates = data.loc[data['sell_signal_price'] > 0]
    # buy_dates.drop(columns=['sell_signal_price',
    #                         'SMA_short', 'SMA_long'], inplace=True)
    # sell_dates.drop(columns=['buy_signal_price',
    #                          'SMA_short', 'SMA_long'], inplace=True)

    dates = pd.concat([buy_dates, sell_dates], sort=True)
    dates.sort_values(by='trade_date', inplace=True, ascending=False)
    dates.drop(
        columns=['close'], inplace=True)

    return dates


In [774]:
# drawing
def graph(data, code, short_term_days, long_term_days):
    ab = plt.figure(figsize=[40, 12])

    plt.plot(data['close'], label='PMAC', alpha=0.35)
    plt.plot(data['SMA_short'], label='SMA_' + str(short_term_days), alpha=0.7)
    plt.plot(data['SMA_long'], label='SMA_' + str(long_term_days), alpha=0.7)
    plt.scatter(data.index, data['buy_signal_price'],
                label='Buy', marker='^', color='green')
    plt.scatter(data.index, data['sell_signal_price'],
                label='Sell', marker='v', color='red')

    plt.title('{} close price hsitory buy and cell signals'.format(code))
    plt.xlabel('{} - {}'.format(data['trade_date'].iloc[-1],  data['trade_date'].iloc[0]))
    plt.ylabel('Close price RMB')
    plt.legend()
    plt.show()


In [775]:
def earning_percent(buy_sell_dates):
    latest_sell_index = pd.Series(
        buy_sell_dates['sell_signal_price']).first_valid_index()
    if latest_sell_index is None:
        return print('no signal of sell latest')
    latest_sell_price = buy_sell_dates.loc[latest_sell_index,
                                           'sell_signal_price']

    buy_sell_dates_before_latest_sell = buy_sell_dates[latest_sell_index:]
    the_buy_index_before_the_latest_sell = pd.Series(
        buy_sell_dates_before_latest_sell['buy_signal_price']).first_valid_index()
    if the_buy_index_before_the_latest_sell is None:
        return print('no signal of buy before the latest cell')

    the_buy_price_before_the_latest_sell = buy_sell_dates.loc[the_buy_index_before_the_latest_sell,
                                                              'buy_signal_price']

    percent = (latest_sell_price - the_buy_price_before_the_latest_sell) / \
        the_buy_price_before_the_latest_sell * 100

    return_data = [the_buy_index_before_the_latest_sell,
                   the_buy_price_before_the_latest_sell,latest_sell_index, latest_sell_price,  str(percent) + '%']
    print(return_data)

    return return_data


In [776]:
def graphChart(stock_data, code, short_term_days, long_term_days):
    data = SMA(stock_data, short_term_days, long_term_days)
    buy_sell_data = buy_sell(data)
    data['buy_signal_price'] = buy_sell_data[0]
    data['sell_signal_price'] = buy_sell_data[1]

    # graph(data, code, short_term_days, long_term_days)

    buy_sell_dates = dates(data)
    print(short_term_days, long_term_days)
    # print(but_sell_dates)
    return earning_percent(buy_sell_dates)


In [777]:
# https://tushare.pro/document/2?doc_id=25
# https://tushare.pro/document/2?doc_id=27

# get Constituents of SSE
data = pro.stock_basic(exchange='SSE', list_status='L',
                       fields='ts_code,symbol,name,area,industry,list_date')

start_date = '201211001'
end_date = '20221001'

for i in range(20):
    ts_code = data['ts_code'][i]
    name = data['name'][i]
    print(str(i + 1) + name + ts_code)

    df = pro.daily(ts_code=ts_code,
                   start_date=start_date, end_date=end_date).drop(columns=[
                       'low', 'high', 'pre_close', 'ts_code', 'change', 'vol', 'pct_chg', 'amount'])
    graphChart(df, ts_code, 5, 15)
    graphChart(df, ts_code, 5, 30)
    graphChart(df, ts_code, 15, 30)
    graphChart(df, ts_code, 15, 60)
    graphChart(df, ts_code, 30, 60)
    graphChart(df, ts_code, 30, 100)
    graphChart(df, ts_code, 60, 100)
    graphChart(df, ts_code, 60, 180)


1浦发银行600000.SH
5 15
[Timestamp('2022-08-17 00:00:00'), 7.28, Timestamp('2022-08-30 00:00:00'), 7.19, '-1.2362637362637343%']
5 30
[Timestamp('2022-07-28 00:00:00'), 7.33, Timestamp('2022-08-10 00:00:00'), 7.09, '-3.274215552523877%']
15 30
[Timestamp('2022-07-22 00:00:00'), 7.35, Timestamp('2022-08-15 00:00:00'), 7.21, '-1.9047619047619004%']
15 60
[Timestamp('2022-03-02 00:00:00'), 8.42, Timestamp('2022-03-21 00:00:00'), 7.76, '-7.838479809976248%']
30 60
[Timestamp('2021-11-12 00:00:00'), 8.65, Timestamp('2021-11-26 00:00:00'), 8.56, '-1.040462427745663%']
30 100
[Timestamp('2020-09-03 00:00:00'), 10.12, Timestamp('2021-01-19 00:00:00'), 10.16, '0.3952569169960566%']
60 100
[Timestamp('2020-08-17 00:00:00'), 10.84, Timestamp('2020-12-29 00:00:00'), 9.53, '-12.08487084870849%']
60 180
[Timestamp('2020-07-22 00:00:00'), 11.62, Timestamp('2020-12-04 00:00:00'), 10.17, '-12.47848537005163%']
2白云机场600004.SH
5 15
[Timestamp('2022-08-25 00:00:00'), 13.63, Timestamp('2022-09-09 00:00:00'), 1

In [778]:
# # Model: Logistic regression analysis

# def set_pre_stock_data(stock_data, days_ago, end_date):
#   length = len(stock_data)
#   df = pro.trade_cal(exchange='', start_date='20020830', end_date='20220830')

#   stock_data["pre_open"] = pre_stock_data["open"]
#   stock_data["pre_high"] = pre_stock_data["high"]
#   stock_data["pre_low"] = pre_stock_data["low"]
#   stock_data["pre_vol"] = pre_stock_data["vol"]
#   stock_data["pre_amount"] = pre_stock_data["amount"]


# pre_stock_data = pro.daily(
#     ts_code='600000.SH', start_date=start_date, end_date=end_date)
# print(pre_stock_data.head())

# # 予測変数:is_up
# stock_data.groupby("change").size()
# stock_data["is_up"] = stock_data["change"].map(lambda x: 1 if x > 0 else 0)
# stock_data.groupby("is_up").size()


# from sklearn.linear_model import LogisticRegression
# from sklearn.model_selection import train_test_split

# # 説明変数
# X = stock_data[["pre_open", "pre_high", "pre_low", "pre_close", "pre_vol", "pre_amount"]]

# Y = stock_data["is_up"]
# X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=0)

# # TODO:random forest / SVM
# model = LogisticRegression()
# clf = model.fit(X_train, y_train)

# print("train result:", clf.score(X_train, y_train))
# print("test result:", clf.score(X_test, y_test))

# from sklearn.preprocessing import StandardScaler

# sc = StandardScaler()
# sc.fit(X_train)
# X_train_std = sc.transform(X_train)
# X_test_std = sc.transform(X_test)

# clf = model.fit(X_train_std, y_train)
# print("train:", clf.score(X_train_std, y_train))
# print("test:", clf.score(X_test_std, y_test))

# # 偏回帰係数
# print(clf.coef_)
# # 0.5303701 is pre_high

# # Calculate the exponential of all elements in the input array.
# print(np.exp(clf.coef_))
