In [1]:
from __future__ import annotations

%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path


from wufam.data.prepare_data import read_kf_data
from wufam.dataset import Dataset

PATH = Path("../data/kf_data")
START = "1970-01-01"
END = "2024-12-31"
DATASET = Dataset.BM_6_M
FACTORS_DATASET = Dataset.FACTORS_M
WEIGHTING = "value_weighted"
FACTOR_ANNUALIZE = 12

## 1.1

In [3]:
portfolios_total_r, portfolios_xs_r, factors_df, rf = read_kf_data(
    portfolios_filename=PATH / DATASET,
    factors_filename=PATH / FACTORS_DATASET,
    start_date=START,
    end_date=END,
    weighting=WEIGHTING,
)

In [4]:
assert (
    portfolios_total_r.shape[0]
    == portfolios_xs_r.shape[0]
    == factors_df.shape[0]
    == rf.shape[0]
)

## 1.4

In [5]:
from wufam.ap.uncond_factor_model import UncondFactorModel

capm = UncondFactorModel()
capm.fit(
    test_assets_xs_r=portfolios_xs_r,
    factors_df=factors_df[["Mkt-RF"]],
)

In [6]:
capm.grs_stat, capm.p_value

(np.float64(6.629569676484499), np.float64(8.082971012380241e-07))

Try monthly

In [7]:
ff_model = UncondFactorModel()
ff_model.fit(
    test_assets_xs_r=portfolios_xs_r,
    factors_df=factors_df,
)

In [8]:
ff_model.grs_stat, ff_model.p_value

(np.float64(5.26895705119617), np.float64(2.5726415393510304e-05))

## 1.5

In [9]:
split_date = portfolios_xs_r.index[len(portfolios_xs_r) // 2]

In [10]:
capm.fit(
    test_assets_xs_r=portfolios_xs_r.loc[:split_date],
    factors_df=factors_df[["Mkt-RF"]].loc[:split_date],
)

In [11]:
capm.rmse_score(
    test_assets_xs_r=portfolios_xs_r.loc[split_date:],
    factors=factors_df[["Mkt-RF"]].loc[split_date:],
)

np.float64(0.0014909569646982166)

In [12]:
capm.r2_score(
    test_assets_xs_r=portfolios_xs_r.loc[split_date:],
    factors=factors_df[["Mkt-RF"]].loc[split_date:],
)

np.float64(-1.1595217020999948)

In [13]:
capm.r2_gls_score(
    test_assets_xs_r=portfolios_xs_r.loc[split_date:],
    factors=factors_df[["Mkt-RF"]].loc[split_date:],
)

np.float64(-0.3658270839465645)

In [14]:
ff_model.fit(
    test_assets_xs_r=portfolios_xs_r.loc[:split_date],
    factors_df=factors_df.loc[:split_date],
)

In [15]:
ff_model.rmse_score(
    test_assets_xs_r=portfolios_xs_r.loc[split_date:],
    factors=factors_df.loc[split_date:],
)

np.float64(0.0010301585243249142)

In [16]:
ff_model.r2_score(
    test_assets_xs_r=portfolios_xs_r.loc[split_date:],
    factors=factors_df.loc[split_date:],
)

np.float64(-0.030944955328074064)

In [17]:
ff_model.r2_gls_score(
    test_assets_xs_r=portfolios_xs_r.loc[split_date:],
    factors=factors_df.loc[split_date:],
)

np.float64(-0.24602739512673666)

In [18]:
from sklearn.decomposition import PCA

pca = PCA(n_components=min(portfolios_total_r.shape))
factors = pca.fit_transform(portfolios_total_r)

  X_transformed = X @ self.components_.T
  X_transformed = X @ self.components_.T
  X_transformed = X @ self.components_.T


In [19]:
factors

array([[-0.16545614,  0.03856496, -0.05397383,  0.01202235, -0.01643063,
        -0.01066984],
       [ 0.10033405, -0.05800805, -0.00394648, -0.00129398,  0.01443853,
        -0.01672804],
       [-0.05364976, -0.04060644, -0.01849345, -0.01230053,  0.00362214,
        -0.00156738],
       ...,
       [-0.03689875, -0.00539322, -0.00091627,  0.02728451,  0.00548326,
        -0.00345135],
       [ 0.22285962,  0.01781967, -0.01443788,  0.02419892,  0.00562038,
         0.00053987],
       [-0.18058023,  0.01840886,  0.03756224,  0.01129008, -0.03080177,
        -0.01339861]], shape=(660, 6))

In [20]:
import numpy as np

signs = np.sign(factors.mean(axis=0))
factors = signs * factors

factors_means = factors.mean(axis=0)
factors_means

array([3.15404268e-18, 3.31174482e-19, 3.25917744e-19, 1.47188659e-19,
       6.51835488e-19, 5.70356052e-19])

In [21]:
np.argsort(factors_means)[::-1]

array([0, 4, 5, 1, 2, 3])

In [22]:
ranked_factors = factors[:, np.argsort(factors_means)[::-1]]
ranked_factors

array([[ 0.16545614,  0.01643063,  0.01066984,  0.03856496,  0.05397383,
        -0.01202235],
       [-0.10033405, -0.01443853,  0.01672804, -0.05800805,  0.00394648,
         0.00129398],
       [ 0.05364976, -0.00362214,  0.00156738, -0.04060644,  0.01849345,
         0.01230053],
       ...,
       [ 0.03689875, -0.00548326,  0.00345135, -0.00539322,  0.00091627,
        -0.02728451],
       [-0.22285962, -0.00562038, -0.00053987,  0.01781967,  0.01443788,
        -0.02419892],
       [ 0.18058023,  0.03080177,  0.01339861,  0.01840886, -0.03756224,
        -0.01129008]], shape=(660, 6))