In [3]:
import os
import sys
import requests

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import RocCurveDisplay
from sklearn.metrics import accuracy_score, classification_report

from config import ACCOUNT_NUMBER, ACCOUNT_PASSWORD, CONSUMER_ID, REDIRECT_URI


In [4]:
import tushare as ts

def grab_price_data():
    df = ts.get_k_data('sh', autype='qfq', start='2010-01-01', end='2020-07-17')
    df.index = pd.to_datetime(df.date)
    df = df[['open', 'high', 'low', 'close', 'volume']]
    df.to_csv('../data/price_data.csv',index_label=False)

grab_price_data()


本接口即将停止更新，请尽快使用Pro版接口：https://tushare.pro/document/2


In [4]:
price_data=pd.read_csv("../data/price_data.csv")

price_data.head()


Unnamed: 0,open,high,low,close,volume
2010-01-04,3289.75,3295.28,3243.32,3243.76,109447927.0
2010-01-05,3254.468,3290.51,3221.46,3282.179,126115066.0
2010-01-06,3277.517,3295.87,3253.04,3254.215,123651384.0
2010-01-07,3253.991,3268.82,3176.71,3192.776,128652827.0
2010-01-08,3177.259,3198.92,3149.02,3195.997,98377147.0


In [8]:
'''
calculate change in price
'''
price_data['change_in_price'] = price_data['close'].diff()
price_data['change_in_price']




2010-01-04        NaN
2010-01-05     38.419
2010-01-06    -27.964
2010-01-07    -61.439
2010-01-08      3.221
               ...   
2020-07-13     59.970
2020-07-14    -28.670
2020-07-15    -53.320
2020-07-16   -151.200
2020-07-17      4.030
Name: change_in_price, Length: 2561, dtype: float64

In [25]:
# calculate the 14 day RSI
n=14
price_data.loc["2010-01-04",'change_in_price']=0
up_df, down_df=price_data[['change_in_price']].copy(), price_data[['change_in_price']].copy()

# For up days, if the change is less than 0 set to 0
up_df.loc['change_in_price']=up_df.loc[(up_df['change_in_price']<0),'change_in_price']=0

# For down days, if the change is greater than 0 than set to 0.
down_df.loc['change_in_price'] = down_df.loc[(down_df['change_in_price']>0),'change_in_price']=0

# We need change in price to be absolute
down_df['change_in_price']=down_df['change_in_price'].abs()

#calculate the EMA
ema_up=up_df['change_in_price'].transform(lambda x: x.ewm(span=n).mean())
ema_down=down_df['change_in_price'].transform(lambda x: x.ewm(span=n).mean())

# Calculate the relative strength
relative_strength=ema_up/ema_down

# Calculate the Relative index
relative_strength_index=100.0-(100.0/(1.0+relative_strength))

# Add the info to the data frame
price_data['down_days'] = down_df['change_in_price']
price_data['up_days']=up_df['change_in_price']
price_data['RSI']=relative_strength_index

price_data.head(30)

Unnamed: 0,open,high,low,close,volume,change_in_price,down_days,up_days,RSI
2010-01-04,3289.75,3295.28,3243.32,3243.76,109447927.0,0.0,0.0,0.0,
2010-01-05,3254.468,3290.51,3221.46,3282.179,126115066.0,38.419,0.0,38.419,100.0
2010-01-06,3277.517,3295.87,3253.04,3254.215,123651384.0,-27.964,27.964,0.0,54.35229
2010-01-07,3253.991,3268.82,3176.71,3192.776,128652827.0,-61.439,61.439,0.0,25.195655
2010-01-08,3177.259,3198.92,3149.02,3195.997,98377147.0,3.221,0.0,3.221,27.546759
2010-01-11,3301.611,3306.75,3197.33,3212.75,136327216.0,16.753,0.0,16.753,39.044393
2010-01-12,3205.705,3275.2,3180.09,3273.966,140655241.0,61.216,0.0,61.216,63.479277
2010-01-13,3204.976,3232.82,3165.49,3172.658,158344278.0,-101.308,101.308,0.0,35.956196
2010-01-14,3183.381,3219.02,3165.6,3215.55,135954504.0,42.892,0.0,42.892,47.150273
2010-01-15,3217.19,3241.82,3197.22,3224.152,120541135.0,8.602,0.0,8.602,49.204768


In [5]:
n = 14

# Make a copy of the high and low column.
low_14, high_14 = price_data[['low']].copy(), price_data[['high']].copy()

# Group by symbol, then apply the rolling function and grab the Min and Max.
low_14 = low_14['low'].transform(lambda x: x.rolling(window = n).min())
high_14 = high_14['high'].transform(lambda x: x.rolling(window = n).max())

# Calculate the Stochastic Oscillator.
k_percent = 100 * ((price_data['close'] - low_14) / (high_14 - low_14))

# Add the info to the data frame.
price_data['low_14'] = low_14
price_data['high_14'] = high_14
price_data['k_percent'] = k_percent

# Display the head.
price_data.head(30)

Unnamed: 0,open,high,low,close,volume,low_14,high_14,k_percent
2010-01-04,3289.75,3295.28,3243.32,3243.76,109447927.0,,,
2010-01-05,3254.468,3290.51,3221.46,3282.179,126115066.0,,,
2010-01-06,3277.517,3295.87,3253.04,3254.215,123651384.0,,,
2010-01-07,3253.991,3268.82,3176.71,3192.776,128652827.0,,,
2010-01-08,3177.259,3198.92,3149.02,3195.997,98377147.0,,,
2010-01-11,3301.611,3306.75,3197.33,3212.75,136327216.0,,,
2010-01-12,3205.705,3275.2,3180.09,3273.966,140655241.0,,,
2010-01-13,3204.976,3232.82,3165.49,3172.658,158344278.0,,,
2010-01-14,3183.381,3219.02,3165.6,3215.55,135954504.0,,,
2010-01-15,3217.19,3241.82,3197.22,3224.152,120541135.0,,,


In [7]:
#威廉指标，利用市场的摆动幅度衡量市场的超买超卖
# Calculate the Williams %R
n = 14

# Make a copy of the high and low column.
low_14, high_14 = price_data[['low']].copy(), price_data[['high']].copy()

# Group by symbol, then apply the rolling function and grab the Min and Max.
low_14 = low_14['low'].transform(lambda x: x.rolling(window = n).min())
high_14 = high_14['high'].transform(lambda x: x.rolling(window = n).max())

# Calculate William %R indicator.
r_percent = ((high_14 - price_data['close']) / (high_14 - low_14)) * - 100

# Add the info to the data frame.
price_data['r_percent'] = r_percent

# Display the head.
price_data.head(30)




Unnamed: 0,open,high,low,close,volume,low_14,high_14,k_percent,r_percent
2010-01-04,3289.75,3295.28,3243.32,3243.76,109447927.0,,,,
2010-01-05,3254.468,3290.51,3221.46,3282.179,126115066.0,,,,
2010-01-06,3277.517,3295.87,3253.04,3254.215,123651384.0,,,,
2010-01-07,3253.991,3268.82,3176.71,3192.776,128652827.0,,,,
2010-01-08,3177.259,3198.92,3149.02,3195.997,98377147.0,,,,
2010-01-11,3301.611,3306.75,3197.33,3212.75,136327216.0,,,,
2010-01-12,3205.705,3275.2,3180.09,3273.966,140655241.0,,,,
2010-01-13,3204.976,3232.82,3165.49,3172.658,158344278.0,,,,
2010-01-14,3183.381,3219.02,3165.6,3215.55,135954504.0,,,,
2010-01-15,3217.19,3241.82,3197.22,3224.152,120541135.0,,,,


In [8]:
#MACD
'''
MACD= EMA12-EMA26 量化价格走势的变化
signalLine= EMA(MACD) 衡量了价格平均的变化速率
'''
ema_26 = price_data['close'].transform(lambda x: x.ewm(span = 26).mean())
ema_12 = price_data['close'].transform(lambda x: x.ewm(span = 12).mean())
macd = ema_12 - ema_26

# Calculate the EMA
ema_9_macd = macd.ewm(span = 9).mean()

# Store the data in the data frame.
price_data['MACD'] = macd
price_data['MACD_EMA'] = ema_9_macd

# Print the head.
price_data.head(30)

Unnamed: 0,open,high,low,close,volume,low_14,high_14,k_percent,r_percent,MACD,MACD_EMA
2010-01-04,3289.75,3295.28,3243.32,3243.76,109447927.0,,,,,0.0,0.0
2010-01-05,3254.468,3290.51,3221.46,3282.179,126115066.0,,,,,0.861965,0.478869
2010-01-06,3277.517,3295.87,3253.04,3254.215,123651384.0,,,,,0.231152,0.377346
2010-01-07,3253.991,3268.82,3176.71,3192.776,128652827.0,,,,,-2.27965,-0.522721
2010-01-08,3177.259,3198.92,3149.02,3195.997,98377147.0,,,,,-3.46782,-1.398821
2010-01-11,3301.611,3306.75,3197.33,3212.75,136327216.0,,,,,-3.400519,-1.941392
2010-01-12,3205.705,3275.2,3180.09,3273.966,140655241.0,,,,,-0.525597,-1.583092
2010-01-13,3204.976,3232.82,3165.49,3172.658,158344278.0,,,,,-3.407613,-2.021559
2010-01-14,3183.381,3219.02,3165.6,3215.55,135954504.0,,,,,-3.223477,-2.299208
2010-01-15,3217.19,3241.82,3197.22,3224.152,120541135.0,,,,,-2.602875,-2.367247


In [16]:
# Calculate the Price Rate of Change
n = 9

# Calculate the Rate of Change in the Price, and store it in the Data Frame.
price_data['Price_Rate_Of_Change'] = price_data['close'].transform(lambda x: x.pct_change(periods = n))

# Print the first 30 rows
price_data.head(30)

Unnamed: 0,open,high,low,close,volume,low_14,high_14,k_percent,r_percent,MACD,MACD_EMA,Price_Rate_Of_Change
2010-01-04,3289.75,3295.28,3243.32,3243.76,109447927.0,,,,,0.0,0.0,
2010-01-05,3254.468,3290.51,3221.46,3282.179,126115066.0,,,,,0.861965,0.478869,
2010-01-06,3277.517,3295.87,3253.04,3254.215,123651384.0,,,,,0.231152,0.377346,
2010-01-07,3253.991,3268.82,3176.71,3192.776,128652827.0,,,,,-2.27965,-0.522721,
2010-01-08,3177.259,3198.92,3149.02,3195.997,98377147.0,,,,,-3.46782,-1.398821,
2010-01-11,3301.611,3306.75,3197.33,3212.75,136327216.0,,,,,-3.400519,-1.941392,
2010-01-12,3205.705,3275.2,3180.09,3273.966,140655241.0,,,,,-0.525597,-1.583092,
2010-01-13,3204.976,3232.82,3165.49,3172.658,158344278.0,,,,,-3.407613,-2.021559,
2010-01-14,3183.381,3219.02,3165.6,3215.55,135954504.0,,,,,-3.223477,-2.299208,
2010-01-15,3217.19,3241.82,3197.22,3224.152,120541135.0,,,,,-2.602875,-2.367247,-0.006045


In [15]:
#OBV
def obv(group):

    # Grab the volume and close column.
    change = group['close'].diff()
    volume = group['volume']


    # intialize the previous OBV
    prev_obv = 0
    obv_values = []

    # calculate the On Balance Volume
    for i, j in zip(change, volume):

        if i > 0:
            current_obv = prev_obv + j
        elif i < 0:
            current_obv = prev_obv - j
        else:
            current_obv = prev_obv

        # OBV.append(current_OBV)
        prev_obv = current_obv
        obv_values.append(current_obv)

    # Return a panda series.
    return pd.Series(obv_values, index = group.index)




2010-01-04    0.000000e+00
2010-01-05    1.261151e+08
2010-01-06    2.463682e+06
2010-01-07   -1.261891e+08
2010-01-08   -2.781200e+07
                  ...     
2020-07-13    5.100935e+10
2020-07-14    5.046613e+10
2020-07-15    4.997410e+10
2020-07-16    4.948349e+10
2020-07-17    4.984314e+10
Length: 2561, dtype: float64