In [38]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from dateutil.relativedelta import relativedelta

In [62]:
base_dir = os.path.join("..", "data", "daily")

asset_paths_dict = {
    "gold": os.path.join(base_dir, "commodities", 'gold.csv'),
    "btc": os.path.join(base_dir, "crypto", 'btc.csv'),
    "sp500": os.path.join(base_dir, "equity indices", "US", 'sp500_TR.csv'),
    "MOEX": os.path.join(base_dir, "equity indices", "RU", 'MCFTR.csv')
}

In [84]:
timeframes = ['D', 'W-FRI', 'ME', 'QE', 'YE']
asset_dfs = dict()

for asset_name in asset_paths_dict.keys():
    df = pd.read_csv(asset_paths_dict[asset_name], parse_dates=['Date'], index_col='Date')
    print(df.index.min(), df.index.max(), f'{asset_name:5}', df.shape)
    
    asset_dfs[asset_name] = dict()
    for freq in timeframes:
        df_resampled = df.resample(freq).last().dropna(subset=['Close'])
        df_resampled['returns'] = df_resampled['Close'].pct_change()
        df_resampled['log_returns'] = np.log(df_resampled['Close']).diff()
        df_resampled = df_resampled.dropna(subset=['returns'])
        asset_dfs[asset_name][freq] = df_resampled
    

1971-01-04 00:00:00 2025-10-24 00:00:00 gold  (13968, 4)
2010-07-19 00:00:00 2025-10-24 00:00:00 btc   (3952, 4)
1988-01-04 00:00:00 2025-10-24 00:00:00 sp500 (9526, 5)
2003-02-26 00:00:00 2025-10-17 00:00:00 MOEX  (8270, 2)


In [91]:
for keys in asset_dfs.keys():
    print(50*'-')
    for keys2 in asset_dfs[keys].keys():
        print(f'{keys:6}', f'{keys2:6}', len(asset_dfs[keys][keys2]))

--------------------------------------------------
gold   D      13967
gold   W-FRI  2856
gold   ME     657
gold   QE     219
gold   YE     54
--------------------------------------------------
btc    D      3951
btc    W-FRI  795
btc    ME     183
btc    QE     61
btc    YE     15
--------------------------------------------------
sp500  D      9525
sp500  W-FRI  1972
sp500  ME     453
sp500  QE     151
sp500  YE     37
--------------------------------------------------
MOEX   D      8269
MOEX   W-FRI  1181
MOEX   ME     272
MOEX   QE     91
MOEX   YE     22


In [86]:
asset_dfs['btc']['YE']

Unnamed: 0_level_0,Open,High,Low,Close,returns,log_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2011-12-31,4.166,4.3,4.0633,4.248,13.16,2.650421
2012-12-31,13.45,13.561,13.366,13.51,2.18032,1.156982
2013-12-31,761.27,762.66,737.84,753.52,54.774981,4.021325
2014-12-31,310.11,316.91,309.32,316.89,-0.579454,-0.866201
2015-12-31,427.095,433.032,417.693,430.496,0.358503,0.306383
2016-12-31,966.557,972.101,929.023,960.523,1.231201,0.80254
2017-12-31,14065.0,15083.0,13986.0,14368.0,13.958517,2.705281
2018-12-31,3866.98,3888.02,3696.86,3728.56,-0.740496,-1.348981
2019-12-31,7267.44,7309.94,7132.67,7194.24,0.929496,0.657259
2020-12-31,28732.0,29301.0,27976.0,29170.0,3.054633,1.39986


In [None]:
calendar_days_per_year = 365.25
weeks_per_year = calendar_days_per_year / 7.0  # â‰ˆ 52.1786 (calendar-based)

def calc_geom_annual_return(df: pd.DataFrame) -> pd.DataFrame:
    s = df['Close']

    start_price, end_price = float(s.iloc[0]), float(s.iloc[-1])
    total_factor = end_price / start_price

    start_date, end_date = s.index[0], s.index[-1]
    rd = relativedelta(end_date, start_date)
    years_float = rd.years + rd.months/12 + rd.days/calendar_days_per_year   # fractional approx
    
    return total_factor ** (1 / years_float) - 1.0

def calc_simple_annual_return(df: pd.DataFrame) -> pd.DataFrame:
    s = df['Close']

    start_price, end_price = float(s.iloc[0]), float(s.iloc[-1])
    total_return = (end_price - start_price) / start_price

    start_date, end_date = s.index[0], s.index[-1]
    rd = relativedelta(end_date, start_date)
    years_float = rd.years + rd.months/12 + rd.days/calendar_days_per_year   # fractional approx
    
    return total_return / years_float

def calc_annual_stddev(df: pd.DataFrame) -> pd.DataFrame:
    s = df['Close']
    anchor = 'W-FRI'  # use Friday as week anchor
    weekly_prices = s.resample(anchor).last()

    weekly_returns = weekly_prices.pct_change().dropna()
    weekly_stddev = weekly_returns.std()
    annualized_stddev_simple = weekly_stddev * (weeks_per_year ** 0.5)

    weekly_log_returns = (weekly_prices / weekly_prices.shift(1)).apply(lambda x: pd.np.log(x)).dropna()
    weekly_log_stddev = weekly_log_returns.std()
    annualized_stddev_log = weekly_log_stddev * (weeks_per_year ** 0.5)
    # daily_returns = s.pct_change().dropna()
    return annualized_stddev_simple, annualized_stddev_log
    