In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import date
import seaborn as sns
import random

import matplotlib.pyplot as plt

from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.mixture import GaussianMixture


from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.stats.diagnostic import acorr_ljungbox

import scipy.stats as stats
from scipy.stats import probplot, laplace, norm, t


import statsmodels.api as sm
from statsmodels.nonparametric.kde import KDEUnivariate
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_process import ArmaProcess

import pymc as pm
import pytensor.tensor as pt
import arviz as az

import tensorflow as tf
from tensorflow import keras


#from tensorflow.keras.utils import plot_model


######################################
#from pmdarima import auto_arima
#from diptest import diptest

In [None]:
class StockData:
    def __init__(self, ticker, start, end, column):
        self.ticker = ticker
        self.start = start
        self.end = end
        self.df = None
        self.column=column

    def fetch_data(self):

        #######################################################################################################
        # yfinance
        self.df = yf.download(self.ticker, start=self.start, end=self.end)
        
        '''
        # Read XLSX and parse the 'Date' column as datetime
        filepath = '/content/drive/My Drive/LEARN/Finance/quantitative finance/Forecasting-stocks/ICICIBANK_NSE_alltime.xlsx'
        sheet = 'ICICIBANK_NSE_alltime'
        df = pd.read_excel(filepath, sheet_name=sheet, skiprows=[1, 2])
        '''

        df.set_index('Price', inplace=True)

        self.df = df[df.index >= self.start].copy()
        print (self.df.head())
        print (self.df.tail())
        #######################################################################################################

        self.df = self.df.sort_index()
        self.df = self.df.dropna()
        self.df.info()

        #PLOT
        ax = self.df.drop(columns=["Volume"]).plot(figsize=(12, 6), title=self.ticker + "Stock Prices")
        ax.set_ylabel("Stock Price")
        ax2 = ax.twinx()
        ax2.set_ylabel("Volume")
        ax2.plot(self.df.index, self.df["Volume"], color="gray", alpha=0.5, linestyle="dashed", label="Volume")
        ax2.legend(loc="upper left")
        plt.show()

        self.df.columns = self.df.columns.get_level_values(0)
        new_df = self.df[[self.column]].copy()
        new_df.info()

        self.CAGR(new_df)
        return new_df

    #change to OOP later
    def CAGR(self, df):
        # Ensure the DataFrame index is datetime
        df = df.sort_index()
        if not isinstance(df.index, pd.DatetimeIndex):
            raise ValueError("DataFrame index must be a DatetimeIndex.")

        yearly_cagr = {}
        cumulative_cagr = {}

        years = sorted(df.index.year.unique())

        # Use the second year as the start to avoid incomplete initial data
        start_year = years[0]
        start_date = df[df.index.year == start_year].index[0]
        start_value = df.loc[start_date].values[0]

        for year in years:  # Skip the first year
            # Yearly CAGR
            yearly_df = df[df.index.year == year]
            start = yearly_df.iloc[0].values[0]
            end = yearly_df.iloc[-1].values[0]
            days = (yearly_df.index[-1] - yearly_df.index[0]).days
            if days > 0:
                yearly_cagr[year] = (end / start) ** (365.25 / days) - 1

            # Cumulative CAGR from start_year to current year
            end_value = df[df.index.year <= year].iloc[-1].values[0]
            total_days = (df[df.index.year <= year].index[-1] - start_date).days
            if total_days > 0:
                cumulative_cagr[year] = (end_value / start_value) ** (365.25 / total_days) - 1

        # Plot
        plt.figure(figsize=(12, 6))

        # Yearly CAGR line
        plt.plot(yearly_cagr.keys(), [v * 100 for v in yearly_cagr.values()],
                marker='o', label='Yearly CAGR')

        # Cumulative CAGR line
        cumulative_values = [v * 100 for v in cumulative_cagr.values()]
        plt.plot(cumulative_cagr.keys(), cumulative_values,
                marker='s', linestyle='--', label='Cumulative CAGR')

        # Annotate cumulative CAGR values
        for year, val in cumulative_cagr.items():
            plt.text(year, val * 100 + 0.5, f"{val * 100:.2f}%", ha='center', fontsize=8, color='blue')

        plt.axhline(0, color='black', linewidth=0.5)
        plt.title("Yearly vs Cumulative CAGR of NIFTY Close Prices")
        plt.xlabel("Year")
        plt.ylabel("CAGR (%)")
        plt.grid(True)
        plt.legend()
        plt.tight_layout()
        plt.show()