In [None]:
%pip install arch==5.0.1
%pip install arviz==0.11.2
%pip install basemap==1.2.1
%pip install copulae==0.7.5
%pip install copulas==0.5.1
%pip install costcla==0.6
%pip install ctgan==0.4.3
%pip install decorator==4.4.2
%pip install gap==0.4.6
%pip install gap-stat==2.0.1
%pip install graphviz==0.17
%pip install hmmlearn==0.2.6
%pip install keras==2.6.0
%pip install missingno==0.5.0
%pip install mpl-toolkits.clifford==0.0.3
%pip install numpy==1.21.2
%pip install numpy-financial==1.0.0
%pip install pandas==1.1.4
%pip install pandas-datareader==0.10.0
%pip install plotly==5.2.1
%pip install pmdarima==1.8.2
%pip install portfoliolab==0.3.0
%pip install py4j==0.10.9
%pip install pyensae==1.3.884
%pip install pymc3==3.11.4
%pip install pyportfolioopt==1.4.2
%pip install python-dateutil==2.8.0
%pip install Quandl==3.6.1
%pip install quantecon==0.5.1
%pip install scikit-image==0.18.1
%pip install scikit-learn==1.0.1
%pip install scipy==1.6.0
%pip install sklearn-som==1.1.0
%pip install table-evaluator==1.2.2.post1
%pip install tables==3.6.1
%pip install tensorboard==2.6.0
%pip install tensorflow==2.6.0
%pip install xgboost==1.4.2
%pip install yellowbrick==1.3.post1
%pip install yfinance==0.1.63

## Oil Price Graph

In [None]:
import quandl
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
plt.style.use('seaborn')
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300

In [None]:
oil = quandl.get("NSE/OIL", authtoken="vEjGTysiCFBuN-z5bjGP",#insert you api token
                 start_date="1980-01-01",
                 end_date="2020-01-01")

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(oil.Close)
plt.ylabel('$')
plt.xlabel('Date')
plt.show()

## Trend

In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
import datetime
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
ticker = '^GSPC'
start = datetime.datetime(2015, 1, 1)
end = datetime.datetime(2021, 1, 1)
SP_prices = yf.download(ticker, start=start, end=end, interval='1mo')\
            .Close

In [None]:
seasonal_decompose(SP_prices, period=12).plot()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(SP_prices)
plt.title('S&P-500 Prices')
plt.ylabel('$')
plt.xlabel('Date')
plt.show()

In [None]:
sm.graphics.tsa.plot_acf(SP_prices, lags=30)
plt.xlabel('Number of Lags')
plt.show()

In [None]:
sm.graphics.tsa.plot_pacf(SP_prices, lags=30)
plt.xlabel('Number of Lags')
plt.show()

 ## Seasonality

In [None]:
from fredapi import Fred
import statsmodels.api as sm

In [None]:
fred = Fred(api_key='78b14ec6ba46f484b94db43694468bb1')#insert you api key

In [None]:
energy = fred.get_series("CAPUTLG2211A2S", 
                         observation_start="2010-01-01", 
                         observation_end="2020-12-31")
energy.head(12)

In [None]:
plt.plot(energy)
plt.title('Energy Capacity Utilization')
plt.ylabel('$')
plt.xlabel('Date')
plt.show()

In [None]:
sm.graphics.tsa.plot_acf(energy, lags=30)
plt.xlabel('Number of Lags')
plt.show()

## Stationarity

In [None]:
stat_test = adfuller(SP_prices)[0:2]
print("The test statistic and p-value of ADF test are {}"
      .format(stat_test))

In [None]:
diff_SP_price = SP_prices.diff()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(diff_SP_price)
plt.title('Differenced S&P-500 Price')
plt.ylabel('$')
plt.xlabel('Date')
plt.show()

In [None]:
sm.graphics.tsa.plot_acf(diff_SP_price.dropna(),lags=30)
plt.xlabel('Number of Lags')
plt.show()

In [None]:
stat_test2 = adfuller(diff_SP_price.dropna())[0:2]
print("The test statistic and p-value of ADF test after differencing are {}"\
      .format(stat_test2))

In [None]:
seasonal_index = energy.resample('Q').mean()

In [None]:
dates = energy.index.year.unique()
deseasonalized = []
for i in dates:
    for j in range(1, 13):
        deseasonalized.append((energy[str(i)][energy[str(i)]\
                                              .index.month==j]))
concat_deseasonalized = np.concatenate(deseasonalized)

In [None]:
deseason_energy = []
for i,s in zip(range(0, len(energy), 3), range(len(seasonal_index))):
    deseason_energy.append(concat_deseasonalized[i:i+3] / 
                           seasonal_index.iloc[s])
concat_deseason_energy = np.concatenate(deseason_energy)
deseason_energy = pd.DataFrame(concat_deseason_energy, 
                               index=energy.index)
deseason_energy.columns = ['Deaseasonalized Energy']
deseason_energy.head()

In [None]:
sm.graphics.tsa.plot_acf(deseason_energy, lags=10)
plt.xlabel('Number of Lags')
plt.show()

In [None]:
sm.graphics.tsa.plot_pacf(deseason_energy, lags=10)
plt.xlabel('Number of Lags')
plt.show()

## White Noise

In [None]:
mu = 0
std = 1 
WN = np.random.normal(mu, std, 1000)

plt.plot(WN)
plt.xlabel('Number of Simulations')
plt.show()

## Moving Average

In [None]:
ticker = ['AAPL', 'MSFT']
start = datetime.datetime(2019, 1, 1)
end = datetime.datetime(2021, 1, 1)
stock_prices = yf.download(ticker, start, end, interval='1d')\
               .Close

In [None]:
stock_prices = stock_prices.dropna()

In [None]:
for i in ticker:
    stat_test = adfuller(stock_prices[i])[0:2]
    print("The ADF test statistic and p-value of {} are {}"\
          .format(i, stat_test))

In [None]:
diff_stock_prices = stock_prices.diff().dropna()

In [None]:
split = int(len(diff_stock_prices['AAPL'].values) * 0.95)
diff_train_aapl = diff_stock_prices['AAPL'].iloc[:split]
diff_test_aapl = diff_stock_prices['AAPL'].iloc[split:]
diff_train_msft = diff_stock_prices['MSFT'].iloc[:split]
diff_test_msft = diff_stock_prices['MSFT'].iloc[split:]

In [None]:
diff_train_aapl.to_csv('diff_train_aapl.csv')
diff_test_aapl.to_csv('diff_test_aapl.csv')
diff_train_msft.to_csv('diff_train_msft.csv')
diff_test_msft.to_csv('diff_test_msft.csv')

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(10, 6))
plt.tight_layout() 
sm.graphics.tsa.plot_acf(diff_train_aapl,lags=30,
                         ax=ax[0], title='ACF - Apple')
sm.graphics.tsa.plot_acf(diff_train_msft,lags=30,
                         ax=ax[1], title='ACF - Microsoft')
plt.show()

In [None]:
short_moving_average_appl = diff_train_aapl.rolling(window=9).mean()
long_moving_average_appl = diff_train_aapl.rolling(window=22).mean()

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(diff_train_aapl.loc[start:end].index, 
        diff_train_aapl.loc[start:end],
        label='Stock Price', linestyle='--')
ax.plot(short_moving_average_appl.loc[start:end].index, 
        short_moving_average_appl.loc[start:end],
        label = 'Short MA', linestyle='solid')
ax.plot(long_moving_average_appl.loc[start:end].index, 
        long_moving_average_appl.loc[start:end],
        label = 'Long MA', linestyle='solid')
ax.legend(loc='best')
ax.set_ylabel('Differenced Price')
ax.set_title('Stock Prediction-Apple')
plt.show()

In [None]:
short_moving_average_msft = diff_train_msft.rolling(window=2).mean()
long_moving_average_msft = diff_train_msft.rolling(window=22).mean()

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(diff_train_msft.loc[start:end].index,
        diff_train_msft.loc[start:end],
        label='Stock Price', linestyle='--')
ax.plot(short_moving_average_msft.loc[start:end].index,
        short_moving_average_msft.loc[start:end],
        label = 'Short MA', linestyle='solid')
ax.plot(long_moving_average_msft.loc[start:end].index,
        long_moving_average_msft.loc[start:end],
        label = 'Long MA', linestyle='-.')
ax.legend(loc='best')
ax.set_ylabel('Differenced Price')
ax.set_xlabel('Date')
ax.set_title('Stock Prediction-Microsoft')
plt.show()

## Autoregressive Model

In [None]:
sm.graphics.tsa.plot_pacf(diff_train_aapl, lags=30)
plt.title('PACF of Apple')
plt.xlabel('Number of Lags')
plt.show()

In [None]:
sm.graphics.tsa.plot_pacf(diff_train_msft, lags=30)
plt.title('PACF of Microsoft')
plt.xlabel('Number of Lags')
plt.show()

In [None]:
from statsmodels.tsa.ar_model import AutoReg
import warnings
warnings.filterwarnings('ignore')

In [None]:
ar_aapl = AutoReg(diff_train_aapl.values, lags=29)
ar_fitted_aapl = ar_aapl.fit()

In [None]:
ar_predictions_aapl = ar_fitted_aapl.predict(start=len(diff_train_aapl), 
                                             end=len(diff_train_aapl)\
                                             + len(diff_test_aapl) - 1, 
                                             dynamic=False)

In [None]:
for i in range(len(ar_predictions_aapl)):
    print('==' * 25)
    print('predicted values:{:.4f} & actual values:{:.4f}'\
          .format(ar_predictions_aapl[i], diff_test_aapl[i]))

In [None]:
ar_predictions_aapl = pd.DataFrame(ar_predictions_aapl)
ar_predictions_aapl.index = diff_test_aapl.index

In [None]:
ar_msft = AutoReg(diff_train_msft.values, lags=26)
ar_fitted_msft = ar_msft.fit()

In [None]:
ar_predictions_msft = ar_fitted_msft.predict(start=len(diff_train_msft), 
                                             end=len(diff_train_msft)\
                                             +len(diff_test_msft) - 1,
                                             dynamic=False)

In [None]:
ar_predictions_msft = pd.DataFrame(ar_predictions_msft)
ar_predictions_msft.index = diff_test_msft.index

In [None]:
fig, ax = plt.subplots(2,1, figsize=(18, 15))
 
ax[0].plot(diff_test_aapl, label='Actual Stock Price', linestyle='--')
ax[0].plot(ar_predictions_aapl, linestyle='solid', label="Prediction")
ax[0].set_title('Predicted Stock Price-Apple')
ax[0].legend(loc='best')
ax[1].plot(diff_test_msft, label='Actual Stock Price', linestyle='--')
ax[1].plot(ar_predictions_msft, linestyle='solid', label="Prediction")
ax[1].set_title('Predicted Stock Price-Microsoft')
ax[1].legend(loc='best')
for ax in ax.flat:
    ax.set(xlabel='Date', ylabel='Differenced Price')
plt.show()

## ARIMA Model

In [None]:
from statsmodels.tsa.arima_model import ARIMA

In [None]:
split = int(len(stock_prices['AAPL'].values) * 0.95)
train_aapl = stock_prices['AAPL'].iloc[:split]
test_aapl = stock_prices['AAPL'].iloc[split:]
train_msft = stock_prices['MSFT'].iloc[:split]
test_msft = stock_prices['MSFT'].iloc[split:]

In [None]:
arima_aapl = ARIMA(train_aapl,order=(9, 1, 9))
arima_fit_aapl = arima_aapl.fit()

In [None]:
arima_msft = ARIMA(train_msft, order=(6, 1, 6))
arima_fit_msft = arima_msft.fit()

In [None]:
arima_predict_aapl = arima_fit_aapl.predict(start=len(train_aapl), 
                                                  end=len(train_aapl)\
                                                  + len(test_aapl) - 1, 
                                                  dynamic=False)
arima_predict_msft = arima_fit_msft.predict(start=len(train_msft), 
                                                  end=len(train_msft)\
                                                  + len(test_msft) - 1, 
                                                  dynamic=False)

In [None]:
arima_predict_aapl = pd.DataFrame(arima_predict_aapl)
arima_predict_aapl.index = diff_test_aapl.index
arima_predict_msft = pd.DataFrame(arima_predict_msft)
arima_predict_msft.index = diff_test_msft.index

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(18, 15))
 
ax[0].plot(diff_test_aapl, label='Actual Stock Price', linestyle='--')
ax[0].plot(arima_predict_aapl, linestyle='solid', label="Prediction")
ax[0].set_title('Predicted Stock Price-Apple')
ax[0].legend(loc='best')
ax[1].plot(diff_test_msft, label='Actual Stock Price', linestyle='--')
ax[1].plot(arima_predict_msft, linestyle='solid', label="Prediction")
ax[1].set_title('Predicted Stock Price-Microsoft')
ax[1].legend(loc='best')
for ax in ax.flat:
    ax.set(xlabel='Date', ylabel='Differenced Price')
plt.show()


In [None]:
import itertools

In [None]:
p = q = range(0, 9)
d = range(0, 3)
pdq = list(itertools.product(p, d, q))
arima_results_aapl = []
for param_set in pdq:
    try:
        arima_aapl = ARIMA(train_aapl, order=param_set)
        arima_fitted_aapl = arima_aapl.fit()
        arima_results_aapl.append(arima_fitted_aapl.aic)
    except:
        continue
print('**'*25)
print('The Lowest AIC score is {:.4f} and the corresponding parameters are {}'
      .format(pd.DataFrame(arima_results_aapl)
             .where(pd.DataFrame(arima_results_aapl).T.notnull().all()).min()[0], 
             pdq[arima_results_aapl.index(min(arima_results_aapl))]))

In [None]:
arima_aapl = ARIMA(train_aapl, order=(4, 1, 4))
arima_fit_aapl = arima_aapl.fit()

In [None]:
p = q = range(0, 6)
d = range(0, 3)
pdq = list(itertools.product(p, d, q))
arima_results_msft = []
for param_set in pdq:
    try:
        arima_msft = ARIMA(stock_prices['MSFT'], order=param_set)
        arima_fitted_msft = arima_msft.fit()
        arima_results_msft.append(arima_fitted_msft.aic)
    except:
        continue
print('**' * 25)
print('The lowest AIC score is {:.4f} and parameters are {}'
      .format(pd.DataFrame(arima_results_msft)
              .where(pd.DataFrame(arima_results_msft).T.notnull()\
                     .all()).min()[0], 
              pdq[arima_results_msft.index(min(arima_results_msft))]))

In [None]:
arima_msft = ARIMA(stock_prices['MSFT'], order=(4, 2 ,4))
arima_fit_msft= arima_msft.fit()

In [None]:
arima_predict_aapl = arima_fit_aapl.predict(start=len(train_aapl), 
                                                  end=len(train_aapl)\
                                                  +len(test_aapl) - 1, 
                                                  dynamic=False)
arima_predict_msft = arima_fit_msft.predict(start=len(train_msft), 
                                                  end=len(train_msft)\
                                                  + len(test_msft) - 1, 
                                                  dynamic=False)

In [None]:
arima_predict_aapl = pd.DataFrame(arima_predict_aapl)
arima_predict_aapl.index = diff_test_aapl.index
arima_predict_msft = pd.DataFrame(arima_predict_msft)
arima_predict_msft.index = diff_test_msft.index

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(18, 15))
 
ax[0].plot(diff_test_aapl, label='Actual Stock Price', linestyle='--')
ax[0].plot(arima_predict_aapl, linestyle='solid', label="Prediction")
ax[0].set_title('Predicted Stock Price-Apple')
ax[0].legend(loc='best')
ax[1].plot(diff_test_msft, label='Actual Stock Price', linestyle='--')
ax[1].plot(arima_predict_msft, linestyle='solid', label="Prediction")
ax[1].set_title('Predicted Stock Price-Microsoft')
ax[1].legend(loc='best')
for ax in ax.flat:
    ax.set(xlabel='Date', ylabel='Differenced Price')
plt.show()