In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt

In [2]:
moutai = pd.read_csv("Kweichow Moutai.csv", index_col=0)
# drop rows where data is not available
moutai = moutai.dropna()
moutai

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2001-08-27,6.867947,7.518720,6.537585,7.074920,5.446078,204166311.0
2001-08-28,6.963473,7.363490,6.887848,7.335628,5.646763,65145307.0
2001-08-29,7.359509,7.363490,7.184378,7.240101,5.573229,26758394.0
2001-08-30,7.220200,7.464986,7.164476,7.383391,5.683531,24125560.0
2001-08-31,7.393342,7.486878,7.323687,7.365480,5.669743,11673333.0
...,...,...,...,...,...,...
2021-10-18,1888.000000,1888.000000,1787.000000,1800.000000,1800.000000,6965202.0
2021-10-19,1800.000000,1838.800049,1799.880005,1812.180054,1812.180054,4031456.0
2021-10-20,1833.000000,1849.800049,1806.010010,1841.000000,1841.000000,2866339.0
2021-10-21,1841.099976,1860.000000,1829.000000,1844.000000,1844.000000,2301870.0


Exponential Smoothing of Adjusted Close

In [3]:
# statsmodels automatically find an optimized alpha value for us
smoothed_price = SimpleExpSmoothing(moutai["Adj Close"], initialization_method="estimated").fit()
# the fitted alpha level
smoothed_price.model.params["smoothing_level"]



0.9865347157869929

In [4]:
moutai["smoothed_price"] = smoothed_price.fittedvalues
moutai

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,smoothed_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2001-08-27,6.867947,7.518720,6.537585,7.074920,5.446078,204166311.0,5.448881
2001-08-28,6.963473,7.363490,6.887848,7.335628,5.646763,65145307.0,5.446116
2001-08-29,7.359509,7.363490,7.184378,7.240101,5.573229,26758394.0,5.644061
2001-08-30,7.220200,7.464986,7.164476,7.383391,5.683531,24125560.0,5.574183
2001-08-31,7.393342,7.486878,7.323687,7.365480,5.669743,11673333.0,5.682059
...,...,...,...,...,...,...,...
2021-10-18,1888.000000,1888.000000,1787.000000,1800.000000,1800.000000,6965202.0,1916.892867
2021-10-19,1800.000000,1838.800049,1799.880005,1812.180054,1812.180054,4031456.0,1801.573996
2021-10-20,1833.000000,1849.800049,1806.010010,1841.000000,1841.000000,2866339.0,1812.037240
2021-10-21,1841.099976,1860.000000,1829.000000,1844.000000,1844.000000,2301870.0,1840.610008


Feature Extraction

1 Relative Strength Index (RSI)
$$
RSI = 100 - \frac{100}{1+RS}\\
RS = \frac{\text{average gain over past 14 days}}{\text{average loss over past 14 days}}
$$
code reference: https://www.roelpeters.be/many-ways-to-calculate-the-rsi-in-python-pandas/

In [5]:
# 1-day price difference
close_delta = moutai["smoothed_price"].diff()
close_delta

Date
2001-08-27           NaN
2001-08-28     -0.002765
2001-08-29      0.197945
2001-08-30     -0.069878
2001-08-31      0.107876
                 ...    
2021-10-18      5.649064
2021-10-19   -115.318871
2021-10-20     10.463245
2021-10-21     28.572768
2021-10-22      3.344345
Name: smoothed_price, Length: 4819, dtype: float64

In [6]:
# make 2 series: one for higher closes and one for lower closes
up = close_delta.clip(lower=0)
down = -1 * close_delta.clip(upper=0) # abslute values

In [7]:
# average gain and loss over 14-days moving time window
ma_up = up.rolling(window=14).mean()
ma_down = down.rolling(window=14).mean()

In [8]:
# RSI
rs = ma_up / ma_down
rsi = 100 - (100 / (1 + rs))
moutai["RSI"] = rsi
moutai

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,smoothed_price,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2001-08-27,6.867947,7.518720,6.537585,7.074920,5.446078,204166311.0,5.448881,
2001-08-28,6.963473,7.363490,6.887848,7.335628,5.646763,65145307.0,5.446116,
2001-08-29,7.359509,7.363490,7.184378,7.240101,5.573229,26758394.0,5.644061,
2001-08-30,7.220200,7.464986,7.164476,7.383391,5.683531,24125560.0,5.574183,
2001-08-31,7.393342,7.486878,7.323687,7.365480,5.669743,11673333.0,5.682059,
...,...,...,...,...,...,...,...,...
2021-10-18,1888.000000,1888.000000,1787.000000,1800.000000,1800.000000,6965202.0,1916.892867,79.012443
2021-10-19,1800.000000,1838.800049,1799.880005,1812.180054,1812.180054,4031456.0,1801.573996,60.589377
2021-10-20,1833.000000,1849.800049,1806.010010,1841.000000,1841.000000,2866339.0,1812.037240,66.956226
2021-10-21,1841.099976,1860.000000,1829.000000,1844.000000,1844.000000,2301870.0,1840.610008,69.122215


2 Stochastic Oscillator (%K)
$$
\%K = 100 * \frac{C-L14}{H14-L14}
$$
C = Current Closing Price  
L14 = Lowest Low over the past 14 days  
H14 = Highest High over the past 14 days

In [9]:
# highest high over the 14-days moving window
hhigh = moutai["smoothed_price"].rolling(window=14).max()
# lowest low over the 14-days moving window
llow = moutai["smoothed_price"].rolling(window=14).min()
# stochastic oscillator
so = 100 * (moutai["smoothed_price"] - llow) / (hhigh - llow)
moutai["%K"] = so
moutai

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,smoothed_price,RSI,%K
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2001-08-27,6.867947,7.518720,6.537585,7.074920,5.446078,204166311.0,5.448881,,
2001-08-28,6.963473,7.363490,6.887848,7.335628,5.646763,65145307.0,5.446116,,
2001-08-29,7.359509,7.363490,7.184378,7.240101,5.573229,26758394.0,5.644061,,
2001-08-30,7.220200,7.464986,7.164476,7.383391,5.683531,24125560.0,5.574183,,
2001-08-31,7.393342,7.486878,7.323687,7.365480,5.669743,11673333.0,5.682059,,
...,...,...,...,...,...,...,...,...,...
2021-10-18,1888.000000,1888.000000,1787.000000,1800.000000,1800.000000,6965202.0,1916.892867,79.012443,95.846732
2021-10-19,1800.000000,1838.800049,1799.880005,1812.180054,1812.180054,4031456.0,1801.573996,60.589377,56.630783
2021-10-20,1833.000000,1849.800049,1806.010010,1841.000000,1841.000000,2866339.0,1812.037240,66.956226,60.188970
2021-10-21,1841.099976,1860.000000,1829.000000,1844.000000,1844.000000,2301870.0,1840.610008,69.122215,62.485774


3 Williams %R
$$
\%R = \frac{H14-C}{H14-L14} * (-100)
$$

In [10]:
wr = (-100) * (hhigh - moutai["smoothed_price"]) / (hhigh - llow)
moutai["%R"] = wr
moutai

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,smoothed_price,RSI,%K,%R
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001-08-27,6.867947,7.518720,6.537585,7.074920,5.446078,204166311.0,5.448881,,,
2001-08-28,6.963473,7.363490,6.887848,7.335628,5.646763,65145307.0,5.446116,,,
2001-08-29,7.359509,7.363490,7.184378,7.240101,5.573229,26758394.0,5.644061,,,
2001-08-30,7.220200,7.464986,7.164476,7.383391,5.683531,24125560.0,5.574183,,,
2001-08-31,7.393342,7.486878,7.323687,7.365480,5.669743,11673333.0,5.682059,,,
...,...,...,...,...,...,...,...,...,...,...
2021-10-18,1888.000000,1888.000000,1787.000000,1800.000000,1800.000000,6965202.0,1916.892867,79.012443,95.846732,-4.153268
2021-10-19,1800.000000,1838.800049,1799.880005,1812.180054,1812.180054,4031456.0,1801.573996,60.589377,56.630783,-43.369217
2021-10-20,1833.000000,1849.800049,1806.010010,1841.000000,1841.000000,2866339.0,1812.037240,66.956226,60.188970,-39.811030
2021-10-21,1841.099976,1860.000000,1829.000000,1844.000000,1844.000000,2301870.0,1840.610008,69.122215,62.485774,-37.514226


4 Moving Average Convergence Divergence (MACD)
$$
MACD = EMA_{12}(C) - EMA_{26}(C)\\
Signal Line = EMA_9(MACD)
$$
EMA_n = n-day Exponential Moving Average  
code reference: https://towardsdatascience.com/implementing-macd-in-python-cc9b2280126a

In [11]:
# 12-day exponential moving average
ema12 = moutai["smoothed_price"].ewm(span=12, adjust=False).mean()
# 26-day exponential moving average
ema26 = moutai["smoothed_price"].ewm(span=26, adjust=False).mean()
# MACD
macd = ema12 - ema26
# signal line
sigline = macd.ewm(span=9, adjust=False).mean()

moutai["MACD"] = macd
moutai["Signal Line"] = sigline
moutai

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,smoothed_price,RSI,%K,%R,MACD,Signal Line
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2001-08-27,6.867947,7.518720,6.537585,7.074920,5.446078,204166311.0,5.448881,,,,0.000000,0.000000
2001-08-28,6.963473,7.363490,6.887848,7.335628,5.646763,65145307.0,5.446116,,,,-0.000221,-0.000044
2001-08-29,7.359509,7.363490,7.184378,7.240101,5.573229,26758394.0,5.644061,,,,0.015400,0.003045
2001-08-30,7.220200,7.464986,7.164476,7.383391,5.683531,24125560.0,5.574183,,,,0.021888,0.006813
2001-08-31,7.393342,7.486878,7.323687,7.365480,5.669743,11673333.0,5.682059,,,,0.035327,0.012516
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-18,1888.000000,1888.000000,1787.000000,1800.000000,1800.000000,6965202.0,1916.892867,79.012443,95.846732,-4.153268,63.176283,41.669072
2021-10-19,1800.000000,1838.800049,1799.880005,1812.180054,1812.180054,4031456.0,1801.573996,60.589377,56.630783,-43.369217,55.541386,44.443535
2021-10-20,1833.000000,1849.800049,1806.010010,1841.000000,1841.000000,2866339.0,1812.037240,66.956226,60.188970,-39.811030,49.761353,45.507099
2021-10-21,1841.099976,1860.000000,1829.000000,1844.000000,1844.000000,2301870.0,1840.610008,69.122215,62.485774,-37.514226,46.945065,45.794692


5 Price Rate of Change
$$
PROC(t) = \frac{C(t) - C(t-n)}{C(t-n)}
$$
The paper did not specify n. For now, pick n to be 14.

In [12]:
numerator = moutai["smoothed_price"].diff(14)
denominator = moutai["smoothed_price"].shift(14)
moutai["PROC"] = numerator / denominator
moutai

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,smoothed_price,RSI,%K,%R,MACD,Signal Line,PROC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2001-08-27,6.867947,7.518720,6.537585,7.074920,5.446078,204166311.0,5.448881,,,,0.000000,0.000000,
2001-08-28,6.963473,7.363490,6.887848,7.335628,5.646763,65145307.0,5.446116,,,,-0.000221,-0.000044,
2001-08-29,7.359509,7.363490,7.184378,7.240101,5.573229,26758394.0,5.644061,,,,0.015400,0.003045,
2001-08-30,7.220200,7.464986,7.164476,7.383391,5.683531,24125560.0,5.574183,,,,0.021888,0.006813,
2001-08-31,7.393342,7.486878,7.323687,7.365480,5.669743,11673333.0,5.682059,,,,0.035327,0.012516,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-18,1888.000000,1888.000000,1787.000000,1800.000000,1800.000000,6965202.0,1916.892867,79.012443,95.846732,-4.153268,63.176283,41.669072,0.170424
2021-10-19,1800.000000,1838.800049,1799.880005,1812.180054,1812.180054,4031456.0,1801.573996,60.589377,56.630783,-43.369217,55.541386,44.443535,0.068961
2021-10-20,1833.000000,1849.800049,1806.010010,1841.000000,1841.000000,2866339.0,1812.037240,66.956226,60.188970,-39.811030,49.761353,45.507099,0.106026
2021-10-21,1841.099976,1860.000000,1829.000000,1844.000000,1844.000000,2301870.0,1840.610008,69.122215,62.485774,-37.514226,46.945065,45.794692,0.125724


6 On Balance Volume  
see the paper  
code reference: https://stackoverflow.com/questions/52671594/calculating-stockss-on-balance-volume-obv-in-python

In [13]:
obv = (np.sign(moutai["smoothed_price"].diff()) * moutai["Volume"]).fillna(0).cumsum()
moutai["OBV"] = obv
moutai

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,smoothed_price,RSI,%K,%R,MACD,Signal Line,PROC,OBV
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2001-08-27,6.867947,7.518720,6.537585,7.074920,5.446078,204166311.0,5.448881,,,,0.000000,0.000000,,0.0
2001-08-28,6.963473,7.363490,6.887848,7.335628,5.646763,65145307.0,5.446116,,,,-0.000221,-0.000044,,-65145307.0
2001-08-29,7.359509,7.363490,7.184378,7.240101,5.573229,26758394.0,5.644061,,,,0.015400,0.003045,,-38386913.0
2001-08-30,7.220200,7.464986,7.164476,7.383391,5.683531,24125560.0,5.574183,,,,0.021888,0.006813,,-62512473.0
2001-08-31,7.393342,7.486878,7.323687,7.365480,5.669743,11673333.0,5.682059,,,,0.035327,0.012516,,-50839140.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-18,1888.000000,1888.000000,1787.000000,1800.000000,1800.000000,6965202.0,1916.892867,79.012443,95.846732,-4.153268,63.176283,41.669072,0.170424,628285757.0
2021-10-19,1800.000000,1838.800049,1799.880005,1812.180054,1812.180054,4031456.0,1801.573996,60.589377,56.630783,-43.369217,55.541386,44.443535,0.068961,624254301.0
2021-10-20,1833.000000,1849.800049,1806.010010,1841.000000,1841.000000,2866339.0,1812.037240,66.956226,60.188970,-39.811030,49.761353,45.507099,0.106026,627120640.0
2021-10-21,1841.099976,1860.000000,1829.000000,1844.000000,1844.000000,2301870.0,1840.610008,69.122215,62.485774,-37.514226,46.945065,45.794692,0.125724,629422510.0


Add label

In [14]:
# interested in 30-day price movement direction
label = np.sign(moutai["smoothed_price"].diff(30)).shift(-30)
moutai["Label"] = label
moutai

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,smoothed_price,RSI,%K,%R,MACD,Signal Line,PROC,OBV,Label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2001-08-27,6.867947,7.518720,6.537585,7.074920,5.446078,204166311.0,5.448881,,,,0.000000,0.000000,,0.0,1.0
2001-08-28,6.963473,7.363490,6.887848,7.335628,5.646763,65145307.0,5.446116,,,,-0.000221,-0.000044,,-65145307.0,-1.0
2001-08-29,7.359509,7.363490,7.184378,7.240101,5.573229,26758394.0,5.644061,,,,0.015400,0.003045,,-38386913.0,-1.0
2001-08-30,7.220200,7.464986,7.164476,7.383391,5.683531,24125560.0,5.574183,,,,0.021888,0.006813,,-62512473.0,-1.0
2001-08-31,7.393342,7.486878,7.323687,7.365480,5.669743,11673333.0,5.682059,,,,0.035327,0.012516,,-50839140.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-18,1888.000000,1888.000000,1787.000000,1800.000000,1800.000000,6965202.0,1916.892867,79.012443,95.846732,-4.153268,63.176283,41.669072,0.170424,628285757.0,
2021-10-19,1800.000000,1838.800049,1799.880005,1812.180054,1812.180054,4031456.0,1801.573996,60.589377,56.630783,-43.369217,55.541386,44.443535,0.068961,624254301.0,
2021-10-20,1833.000000,1849.800049,1806.010010,1841.000000,1841.000000,2866339.0,1812.037240,66.956226,60.188970,-39.811030,49.761353,45.507099,0.106026,627120640.0,
2021-10-21,1841.099976,1860.000000,1829.000000,1844.000000,1844.000000,2301870.0,1840.610008,69.122215,62.485774,-37.514226,46.945065,45.794692,0.125724,629422510.0,


In [15]:
# dropna
moutai = moutai.dropna()
moutai

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,smoothed_price,RSI,%K,%R,MACD,Signal Line,PROC,OBV,Label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2001-09-14,7.166466,7.263983,7.124674,7.224180,5.560975,3855436.0,5.498359,52.453651,17.916430,-82.083570,0.022033,0.037614,0.009080,-123595084.0,-1.0
2001-09-17,7.180397,7.222190,7.064970,7.132634,5.490505,4514257.0,5.560132,55.341492,34.130613,-65.869387,0.019902,0.034072,0.020935,-119080827.0,-1.0
2001-09-18,7.140595,7.273934,7.134624,7.214230,5.553315,5413338.0,5.491443,41.864782,8.651853,-91.348147,0.012526,0.029763,-0.027041,-124494165.0,-1.0
2001-09-19,7.224180,7.347568,7.176417,7.347568,5.655955,4346710.0,5.552482,48.832246,31.293020,-68.706980,0.011474,0.026105,-0.003893,-120147455.0,-1.0
2001-09-20,7.343588,7.373440,7.263983,7.285874,5.608464,5614375.0,5.654562,48.511069,69.157243,-30.842757,0.018662,0.024616,-0.004839,-114533080.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-26,1664.989990,1664.989990,1594.719971,1595.000000,1595.000000,5402037.0,1664.473541,46.750191,46.460032,-53.539968,-73.373943,-77.728874,-0.020518,602902665.0,1.0
2021-08-27,1589.000000,1625.000000,1584.099976,1596.030029,1596.030029,3609545.0,1595.935481,43.462614,18.891555,-81.108445,-72.410283,-76.665156,-0.045140,599293120.0,1.0
2021-08-30,1605.000000,1613.000000,1545.949951,1586.000000,1586.000000,5158770.0,1596.028756,41.219172,18.929073,-81.070927,-70.822649,-75.496655,-0.057555,604451890.0,1.0
2021-08-31,1589.800049,1616.349976,1555.099976,1558.000000,1558.000000,4367709.0,1586.135040,27.058074,18.383962,-81.616038,-69.560924,-74.309508,-0.117627,600084181.0,1.0


In [16]:
moutai.to_csv("moutai_cleaned.csv")

Apply the same thing to Wuliangye

In [17]:
wuliangye = pd.read_csv("Wuliangye.csv", index_col=0)
# drop rows where data is not available
wuliangye = wuliangye.dropna()

In [18]:
# statsmodels automatically find an optimized alpha value for us
smoothed_price = SimpleExpSmoothing(wuliangye["Adj Close"], initialization_method="estimated").fit()
# the fitted alpha level
smoothed_price.model.params["smoothing_level"]



0.9999999850988388

In [19]:
wuliangye["smoothed_price"] = smoothed_price.fittedvalues

In [20]:
# RSI
# 1-day price difference
close_delta = wuliangye["smoothed_price"].diff()
# make 2 series: one for higher closes and one for lower closes
up = close_delta.clip(lower=0)
down = -1 * close_delta.clip(upper=0) # abslute values
# average gain and loss over 14-days moving time window
ma_up = up.rolling(window=14).mean()
ma_down = down.rolling(window=14).mean()
# RSI
rs = ma_up / ma_down
rsi = 100 - (100 / (1 + rs))
wuliangye["RSI"] = rsi
wuliangye

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,smoothed_price,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1998-04-27,2.672735,5.117431,2.672735,4.809487,3.590158,380522005.0,3.590172,
1998-04-28,4.848093,5.072541,4.812181,4.865151,3.631710,75360487.0,3.590158,
1998-04-29,4.935179,4.936076,4.749335,4.785247,3.572063,37795153.0,3.631710,
1998-04-30,4.785247,4.866048,4.776269,4.816670,3.595520,26572724.0,3.572063,
1998-05-04,4.946850,5.298786,4.937872,5.298786,3.955407,54811776.0,3.595520,
...,...,...,...,...,...,...,...,...
2021-10-18,224.949997,224.949997,212.500000,215.320007,215.320007,52228655.0,234.320007,75.624831
2021-10-19,213.000000,220.880005,213.000000,218.970001,218.970001,27151971.0,215.320007,57.695412
2021-10-20,220.500000,222.199997,216.880005,219.940002,219.940002,17171231.0,218.970001,65.175763
2021-10-21,221.000000,222.949997,219.000000,220.990005,220.990005,15781385.0,219.940002,65.377203


In [21]:
# %K
# highest high over the 14-days moving window
hhigh = wuliangye["smoothed_price"].rolling(window=14).max()
# lowest low over the 14-days moving window
llow = wuliangye["smoothed_price"].rolling(window=14).min()
# stochastic oscillator
so = 100 * (wuliangye["smoothed_price"] - llow) / (hhigh - llow)
wuliangye["%K"] = so
wuliangye

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,smoothed_price,RSI,%K
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1998-04-27,2.672735,5.117431,2.672735,4.809487,3.590158,380522005.0,3.590172,,
1998-04-28,4.848093,5.072541,4.812181,4.865151,3.631710,75360487.0,3.590158,,
1998-04-29,4.935179,4.936076,4.749335,4.785247,3.572063,37795153.0,3.631710,,
1998-04-30,4.785247,4.866048,4.776269,4.816670,3.595520,26572724.0,3.572063,,
1998-05-04,4.946850,5.298786,4.937872,5.298786,3.955407,54811776.0,3.595520,,
...,...,...,...,...,...,...,...,...,...
2021-10-18,224.949997,224.949997,212.500000,215.320007,215.320007,52228655.0,234.320007,75.624831,87.670953
2021-10-19,213.000000,220.880005,213.000000,218.970001,218.970001,27151971.0,215.320007,57.695412,46.429370
2021-10-20,220.500000,222.199997,216.880005,219.940002,219.940002,17171231.0,218.970001,65.175763,53.861347
2021-10-21,221.000000,222.949997,219.000000,220.990005,220.990005,15781385.0,219.940002,65.377203,49.189463


In [22]:
# Williams %R
wr = (-100) * (hhigh - wuliangye["smoothed_price"]) / (hhigh - llow)
wuliangye["%R"] = wr
wuliangye

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,smoothed_price,RSI,%K,%R
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1998-04-27,2.672735,5.117431,2.672735,4.809487,3.590158,380522005.0,3.590172,,,
1998-04-28,4.848093,5.072541,4.812181,4.865151,3.631710,75360487.0,3.590158,,,
1998-04-29,4.935179,4.936076,4.749335,4.785247,3.572063,37795153.0,3.631710,,,
1998-04-30,4.785247,4.866048,4.776269,4.816670,3.595520,26572724.0,3.572063,,,
1998-05-04,4.946850,5.298786,4.937872,5.298786,3.955407,54811776.0,3.595520,,,
...,...,...,...,...,...,...,...,...,...,...
2021-10-18,224.949997,224.949997,212.500000,215.320007,215.320007,52228655.0,234.320007,75.624831,87.670953,-12.329047
2021-10-19,213.000000,220.880005,213.000000,218.970001,218.970001,27151971.0,215.320007,57.695412,46.429370,-53.570630
2021-10-20,220.500000,222.199997,216.880005,219.940002,219.940002,17171231.0,218.970001,65.175763,53.861347,-46.138653
2021-10-21,221.000000,222.949997,219.000000,220.990005,220.990005,15781385.0,219.940002,65.377203,49.189463,-50.810537


In [23]:
# MACD
# 12-day exponential moving average
ema12 = wuliangye["smoothed_price"].ewm(span=12, adjust=False).mean()
# 26-day exponential moving average
ema26 = wuliangye["smoothed_price"].ewm(span=26, adjust=False).mean()
# MACD
macd = ema12 - ema26
# signal line
sigline = macd.ewm(span=9, adjust=False).mean()

wuliangye["MACD"] = macd
wuliangye["Signal Line"] = sigline
wuliangye

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,smoothed_price,RSI,%K,%R,MACD,Signal Line
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1998-04-27,2.672735,5.117431,2.672735,4.809487,3.590158,380522005.0,3.590172,,,,0.000000,0.000000e+00
1998-04-28,4.848093,5.072541,4.812181,4.865151,3.631710,75360487.0,3.590158,,,,-0.000001,-2.225253e-07
1998-04-29,4.935179,4.936076,4.749335,4.785247,3.572063,37795153.0,3.631710,,,,0.003313,6.623655e-04
1998-04-30,4.785247,4.866048,4.776269,4.816670,3.595520,26572724.0,3.572063,,,,0.001113,7.525138e-04
1998-05-04,4.946850,5.298786,4.937872,5.298786,3.955407,54811776.0,3.595520,,,,0.001248,8.516697e-04
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-18,224.949997,224.949997,212.500000,215.320007,215.320007,52228655.0,234.320007,75.624831,87.670953,-12.329047,5.118613,8.730034e-01
2021-10-19,213.000000,220.880005,213.000000,218.970001,218.970001,27151971.0,215.320007,57.695412,46.429370,-53.570630,3.935414,1.485486e+00
2021-10-20,220.500000,222.199997,216.880005,219.940002,219.940002,17171231.0,218.970001,65.175763,53.861347,-46.138653,3.254726,1.839334e+00
2021-10-21,221.000000,222.949997,219.000000,220.990005,220.990005,15781385.0,219.940002,65.377203,49.189463,-50.810537,2.761712,2.023809e+00


In [24]:
# PROC
numerator = wuliangye["smoothed_price"].diff(14)
denominator = wuliangye["smoothed_price"].shift(14)
wuliangye["PROC"] = numerator / denominator
wuliangye

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,smoothed_price,RSI,%K,%R,MACD,Signal Line,PROC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1998-04-27,2.672735,5.117431,2.672735,4.809487,3.590158,380522005.0,3.590172,,,,0.000000,0.000000e+00,
1998-04-28,4.848093,5.072541,4.812181,4.865151,3.631710,75360487.0,3.590158,,,,-0.000001,-2.225253e-07,
1998-04-29,4.935179,4.936076,4.749335,4.785247,3.572063,37795153.0,3.631710,,,,0.003313,6.623655e-04,
1998-04-30,4.785247,4.866048,4.776269,4.816670,3.595520,26572724.0,3.572063,,,,0.001113,7.525138e-04,
1998-05-04,4.946850,5.298786,4.937872,5.298786,3.955407,54811776.0,3.595520,,,,0.001248,8.516697e-04,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-18,224.949997,224.949997,212.500000,215.320007,215.320007,52228655.0,234.320007,75.624831,87.670953,-12.329047,5.118613,8.730034e-01,0.189442
2021-10-19,213.000000,220.880005,213.000000,218.970001,218.970001,27151971.0,215.320007,57.695412,46.429370,-53.570630,3.935414,1.485486e+00,0.066205
2021-10-20,220.500000,222.199997,216.880005,219.940002,219.940002,17171231.0,218.970001,65.175763,53.861347,-46.138653,3.254726,1.839334e+00,0.129119
2021-10-21,221.000000,222.949997,219.000000,220.990005,220.990005,15781385.0,219.940002,65.377203,49.189463,-50.810537,2.761712,2.023809e+00,0.131262


In [25]:
#obv
obv = (np.sign(wuliangye["smoothed_price"].diff()) * wuliangye["Volume"]).fillna(0).cumsum()
wuliangye["OBV"] = obv
wuliangye

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,smoothed_price,RSI,%K,%R,MACD,Signal Line,PROC,OBV
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1998-04-27,2.672735,5.117431,2.672735,4.809487,3.590158,380522005.0,3.590172,,,,0.000000,0.000000e+00,,0.000000e+00
1998-04-28,4.848093,5.072541,4.812181,4.865151,3.631710,75360487.0,3.590158,,,,-0.000001,-2.225253e-07,,-7.536049e+07
1998-04-29,4.935179,4.936076,4.749335,4.785247,3.572063,37795153.0,3.631710,,,,0.003313,6.623655e-04,,-3.756533e+07
1998-04-30,4.785247,4.866048,4.776269,4.816670,3.595520,26572724.0,3.572063,,,,0.001113,7.525138e-04,,-6.413806e+07
1998-05-04,4.946850,5.298786,4.937872,5.298786,3.955407,54811776.0,3.595520,,,,0.001248,8.516697e-04,,-9.326282e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-18,224.949997,224.949997,212.500000,215.320007,215.320007,52228655.0,234.320007,75.624831,87.670953,-12.329047,5.118613,8.730034e-01,0.189442,1.283716e+10
2021-10-19,213.000000,220.880005,213.000000,218.970001,218.970001,27151971.0,215.320007,57.695412,46.429370,-53.570630,3.935414,1.485486e+00,0.066205,1.281001e+10
2021-10-20,220.500000,222.199997,216.880005,219.940002,219.940002,17171231.0,218.970001,65.175763,53.861347,-46.138653,3.254726,1.839334e+00,0.129119,1.282718e+10
2021-10-21,221.000000,222.949997,219.000000,220.990005,220.990005,15781385.0,219.940002,65.377203,49.189463,-50.810537,2.761712,2.023809e+00,0.131262,1.284296e+10


In [26]:
# interested in 30-day price movement direction
label = np.sign(wuliangye["smoothed_price"].diff(30)).shift(-30)
wuliangye["Label"] = label
wuliangye = wuliangye.dropna()
wuliangye

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,smoothed_price,RSI,%K,%R,MACD,Signal Line,PROC,OBV,Label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1998-05-18,5.835667,5.835667,5.613014,5.665086,4.228840,8195534.0,4.284466,74.712935,77.478143,-22.521857,0.192351,0.161890,0.193387,2.490974e+08,-1.0
1998-05-19,5.611218,5.745888,5.521439,5.595956,4.177237,12222511.0,4.228840,71.867729,71.428478,-28.571522,0.183741,0.166260,0.177898,2.368749e+08,-1.0
1998-05-20,5.566329,5.593262,5.463082,5.466673,4.080731,10266708.0,4.177237,68.550532,65.816339,-34.183661,0.170784,0.167165,0.150212,2.266082e+08,-1.0
1998-05-21,5.413703,5.674064,5.413703,5.567226,4.155791,8600805.0,4.080731,66.874154,54.151080,-45.848920,0.150989,0.163929,0.142402,2.180074e+08,-1.0
1998-05-22,5.575307,5.656108,5.530417,5.610321,4.187959,9204784.0,4.155791,67.970732,37.374964,-62.625036,0.139746,0.159093,0.155825,2.272122e+08,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-26,217.369995,217.369995,206.000000,207.169998,207.169998,33228274.0,217.500000,44.254309,26.738233,-73.261767,-12.818047,-11.915239,-0.042567,1.272652e+10,1.0
2021-08-27,207.160004,210.000000,204.500000,205.679993,205.679993,27190967.0,207.169998,39.909978,2.636503,-97.363497,-13.159159,-12.164023,-0.083319,1.269932e+10,1.0
2021-08-30,208.500000,209.990005,199.460007,207.000000,207.000000,34732935.0,205.679993,36.600950,0.000000,-100.000000,-13.395311,-12.410280,-0.105778,1.266459e+10,1.0
2021-08-31,206.500000,208.000000,199.399994,201.690002,201.690002,31855168.0,207.000000,21.387615,3.257667,-96.742333,-13.322377,-12.592700,-0.168341,1.269645e+10,1.0


In [27]:
wuliangye.to_csv("wulaingye_cleaned.csv")