In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('../data/processed/processed_data.csv')
df_2000 = df[df['Period'] == '2000s'].reset_index(drop=True)

In [None]:
df_2000.head()

In [None]:
df_2000.columns

In [3]:
df_2000[['U.S. 30 Day TBill TR', 'Russell 2000 TR', 'S&P 500 TR',
       'LB LT Gvt/Credit TR', 'MSCI EAFE TR']].std() * np.sqrt(12)

U.S. 30 Day TBill TR     0.489856
Russell 2000 TR         21.068986
S&P 500 TR              15.925935
LB LT Gvt/Credit TR     14.993268
MSCI EAFE TR            19.403445
dtype: float64

In [5]:
yearly_df = (
        df_2000.drop(["Period"], axis=1)
        .assign(
            Year=lambda x: x["Date"].apply(lambda x: x.split("-")[0]),
        )
        .drop("Date", axis=1)
        .groupby("Year")
        .sum()
        .reset_index()
        # .drop("Year", axis=1)
    )
yearly_df.drop('Year',axis=1).std()

U.S. 30 Day TBill TR     1.731645
Russell 2000 TR         23.524448
S&P 500 TR              21.650257
LB LT Gvt/Credit TR      8.954625
MSCI EAFE TR            24.801192
dtype: float64

In [7]:
from scipy.optimize import curve_fit
y = [5.5, 8.2, 11.0, 13.9, 15.2, 17.1, 19.7, 21.3, 23.2, 24.8]
x = [5.948933, 4.347477, 4.366947, 6.146769, 7.231461, 8.962194, 11.480154, 13.076906, 15.000884, 16.637241]

In [31]:
import numpy as np
import pandas as pd
import altair as alt

# Your data
data = pd.DataFrame({
    'x': np.array([5.948933, 4.347477, 4.366947, 6.146769, 7.231461, 8.962194, 11.480154, 13.076906, 15.000884, 16.637241]),
    'y': np.array([5.5, 8.2, 11.0, 13.9, 15.2, 17.1, 19.7, 21.3, 23.2, 24.8])
})

# Create a line chart with markers and a smooth curve
line = alt.Chart(data).mark_line(point=True).encode(
    x=alt.X('x', sort=None),
    y='y'
).properties(
    title='Line Chart with Connected Dots and Smooth Curve'
)

# Display the plot
line


In [None]:
from scipy.stats import gmean
def calculate_geometric_mean(dataframe):
    # Filter only numeric columns
    numeric_columns = dataframe.select_dtypes(include="number")

    # Calculate the geometric mean for each numeric column
    geometric_means = numeric_columns.apply(gmean, axis=0)

    return geometric_means

In [None]:
def geo_mean(iterable):
    a = np.array(iterable)
    return a.prod()**(1.0/len(a))-1

In [None]:
iterable = [i+1 for i in (yearly_df.drop("Year",axis=1)/100)["Russell 2000 TR"].tolist()]

In [None]:
iterable

In [None]:
(geo_mean(iterable))*100

In [None]:
import pandas as pd

def calculate_geometric_returns(dataframe):
    """
    Calculate the geometric annual returns (CAGR) for each column (asset) in a DataFrame.

    Parameters:
    - dataframe: A Pandas DataFrame where each column represents a different asset's yearly means.

    Returns:
    - A Pandas Series containing the geometric annual returns for each asset.
    """
    geometric_returns = {}

    for column in dataframe.columns:
        series = dataframe[column]
        
        # Check if the series has at least two data points
        if len(series) < 2:
            raise ValueError(f"Column '{column}' does not have enough data points to calculate returns.")
        
        initial_value = series.iloc[0]
        final_value = series.iloc[-1]
        time_period = len(series) - 1  # Number of years
        
        # Calculate the CAGR formula
        cagr = ((final_value / initial_value) ** (1 / time_period)) - 1

        # Convert CAGR to a percentage
        cagr_percentage = cagr * 100

        geometric_returns[column] = cagr_percentage

    return pd.Series(geometric_returns, name="Geometric Returns")

calculate_geometric_returns(yearly_df.drop("Year",axis=1)/100)

In [None]:
yearly_df.select_dtypes(include="number").columns.tolist()

In [None]:
df_2000[['U.S. 30 Day TBill TR', 'Russell 2000 TR', 'S&P 500 TR',
       'LB LT Gvt/Credit TR', 'MSCI EAFE TR']].mean()*12

In [None]:
yearly_df.drop("Year", axis=1).mean()

In [None]:
yearly_df.drop("Year", axis=1).std()

In [None]:
for i in ['U.S. 30 Day TBill TR',
 'Russell 2000 TR',
 'S&P 500 TR',
 'LB LT Gvt/Credit TR',
 'MSCI EAFE TR']:
    
    print(np.exp(yearly_df[i].apply(np.log).mean()))

In [None]:
calculate_geometric_mean(yearly_df)

In [None]:
(
    pd.melt(
        df_2000.drop("Period", axis=1),
        id_vars=["Date"],
        var_name="Indexes",
        value_name="Value",
    )
    .sort_values(by=["Date", "Indexes"])
    .groupby("Indexes")
    .agg(
        Monthly_average_return=("Value", "mean"),
        Monthly_sd=("Value", "std"),
    )
    .reset_index()
).merge(
    (
        pd.melt(
            yearly_df,
            id_vars=["Year"],
            var_name="Indexes",
            value_name="Value",
        )
        .sort_values(by=["Year", "Indexes"])
        .groupby("Indexes")
        .agg(
            Annualized_return=("Value", "mean"),
            Annualized_sd=("Value", "std"),
        )
        .reset_index()
    ),
    on="Indexes"
)

In [None]:
yearly_df.drop(["Year","U.S. 30 Day TBill TR"], axis=1).cov()

In [None]:
(
    pd.melt(
        df_2000.drop("Period", axis=1),
        id_vars=["Date"],
        var_name="Indexes",
        value_name="Value",
    )
    .sort_values(by=["Date", "Indexes"])
    .groupby("Indexes")
    .agg(
        Monthly_average_return=("Value", "mean"),
        Monthly_sd=("Value", "std"),
    )
    .reset_index()
)
    # .reset_index()
    # .assign(Annualized_return=lambda x: x["Monthly_average_return"] * 12)
    # .assign(Annualized_sd=lambda x: x["Monthly_sd"] * 12**0.5)
    # )

In [None]:
def generate_monthly_n_annual_stats_df(data, time_range):
    monthly_annual_stats = (
        pd.melt(
            data.query("Period==@time_range").drop("Period", axis=1),
            id_vars=["Date"],
            var_name="Indexes",
            value_name="Value",
        )
        .sort_values(by=["Date", "Indexes"])
        .groupby("Indexes")
        .agg(
            Monthly_average_return=("Value", "mean"),
            Monthly_sd=("Value", "std"),
        )
        .reset_index()
        .assign(Annualized_return=lambda x: x["Monthly_average_return"] * 12)
        .assign(Annualized_sd=lambda x: x["Monthly_sd"] * 12**0.5)
    )

    st.dataframe(
        monthly_annual_stats.style.apply(highlight_max_by_column),
        use_container_width=True,
        hide_index=True,
    )