
# SAC Market-Neutral Training Notebook

**Описание:** этот ноутбук реализует стратегию *market-neutral* на данных DOW30 в 1‑минутном таймфрейме.
Агент: **SAC (Soft Actor-Critic)**. Окружение — модифицированное FinRL-style `TradingEnv` с комиссией, слиппеджем,
штрафом за net exposure и reward в виде Sharpe‑proxy (rolling). Также реализован walk-forward (rolling) pipeline.

**Что делает ноутбук:**
1. Загружает данные (путь указывается в `DATA_PATH`).
2. Делает базовые фичи (returns, MA, z-score, realized vol, VIX/turbulence если есть).
3. Создаёт векторизуемое окружение `MarketNeutralEnv` (continuous actions: target weights per asset).
4. Тренирует SAC в walk-forward режиме и сохраняет модели и метрики.

**Примечания:** установите необходимые зависимости (`stable-baselines3`, `gym`, `pandas`, `numpy`, `matplotlib`, `ta` и т.д.).
Если используешь собственные датасеты/файлы/окружения — подправь пути и части кода. 


In [9]:
!pip install alpaca_trade_api
!pip install stable_baselines3
!pip install gymnasium
!pip install ta
!pip install finrl
!pip install stockstats
!pip install wrds



In [10]:

# Imports and basic config
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import dill
import json
from datetime import timedelta

# RL libraries
try:
    from stable_baselines3 import SAC
    from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
except Exception as e:
    print("stable-baselines3 not found. Please install it: pip install stable-baselines3[extra]")
    
import gym
from gym import spaces


In [11]:

DATA_PATH = 'prepared_csv/AAPL_prepared.csv'
OUTPUT_DIR = 'results/cas'
os.makedirs(OUTPUT_DIR, exist_ok=True)

ASSETS = None   # if None, infer from dataframe columns
TRAIN_WINDOW_MONTHS = 12
VAL_MONTHS = 3
TEST_MONTHS = 3

# Env / trading params
COMMISSION_RATE = 0.0005
SLIPPAGE_COEF = 0.0001
EXPOSURE_PENALTY = 0.02
TURNOVER_PENALTY = 0.001
SHARPE_WINDOW = 250  # in timesteps

# SAC hyperparams (simple default)
SAC_PARAMS = dict(
    verbose=1,
    learning_rate=3e-4,
    buffer_size=200_000,
    batch_size=256,
    tau=0.005,
    gamma=0.995,
    train_freq=1,
)
TOTAL_TIMESTEPS = 1_000_000  # per fold (adjust to compute resources)


In [12]:

# Data loading and basic preprocessing
def load_data(path):
    if not os.path.exists(path):
        raise FileNotFoundError(f"Data file not found at {path}. Please place CSV with 1-min DOW30 data there.")
    df = pd.read_csv(path, parse_dates=['date'], infer_datetime_format=True)
    df = df.sort_values('date').reset_index(drop=True)
    return df

# Example expected CSV format:
# date, ticker, open, high, low, close, volume, vix, turbulence
# Use multi-asset long format (row per asset per timestamp).
df = load_data(DATA_PATH)
print("Loaded data rows:", len(df))
# Detect asset list
if ASSETS is None:
    ASSETS = df['tic'].unique().tolist() if 'tic' in df.columns else []
    print("Inferred assets:", ASSETS[:10], " (total", len(ASSETS), ")")


  df = pd.read_csv(path, parse_dates=['date'], infer_datetime_format=True)


ValueError: Missing column provided to 'parse_dates': 'date'

In [None]:

# Feature engineering (vectorized)
def add_features(df):
    # Assumes df has columns: date, tic, close, volume (optionally vix, turbulence)
    out = df.copy()
    out['return_1'] = out.groupby('tic')['close'].pct_change().fillna(0)
    out['return_5'] = out.groupby('tic')['close'].pct_change(5).fillna(0)
    out['ma_5'] = out.groupby('tic')['close'].transform(lambda x: x.rolling(5, min_periods=1).mean())
    out['ma_20'] = out.groupby('tic')['close'].transform(lambda x: x.rolling(20, min_periods=1).mean())
    out['ma_diff_5_20'] = out['ma_5'] - out['ma_20']
    out['zscore_20'] = out.groupby('tic')['close'].transform(lambda x: (x - x.rolling(20, min_periods=1).mean()) / (x.rolling(20, min_periods=1).std() + 1e-9))
    out['vol_20'] = out.groupby('tic')['return_1'].transform(lambda x: x.rolling(20, min_periods=1).std())
    # Optional: fill VIX/turbulence forward if present
    if 'vix' in out.columns:
        out['vix'] = out['vix'].fillna(method='ffill').fillna(0)
    if 'turbulence' in out.columns:
        out['turbulence'] = out['turbulence'].fillna(0)
    return out

df = add_features(df)
print("Features added. Columns:", df.columns.tolist())


In [None]:

# Market-neutral environment (continuous target weights per asset)
class MarketNeutralEnv(gym.Env):
    metadata = {'render.modes': ['human']}
    def __init__(self, df, assets, initial_cash=1_000_000, max_trade_amount=1.0,
                 commission_rate=COMMISSION_RATE, slippage_coef=SLIPPAGE_COEF,
                 exposure_penalty=EXPOSURE_PENALTY, turnover_penalty=TURNOVER_PENALTY,
                 sharpe_window=SHARPE_WINDOW):
        super(MarketNeutralEnv, self).__init__()
        self.df = df.copy()
        self.assets = assets
        self.n_assets = len(assets)
        self.initial_cash = initial_cash
        self.max_trade_amount = max_trade_amount
        self.commission_rate = commission_rate
        self.slippage_coef = slippage_coef
        self.exposure_penalty = exposure_penalty
        self.turnover_penalty = turnover_penalty
        self.sharpe_window = sharpe_window

        # observation: for each asset: [close, ma_diff_5_20, zscore_20, vol_20], plus global vix, turbulence (if present)
        obs_len = 4 * self.n_assets + 2
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(obs_len,), dtype=np.float32)
        # continuous actions: target weight per asset in [-1,1]
        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(self.n_assets,), dtype=np.float32)

        # internal state reset
        self.reset()

    def _get_row(self, idx):
        # returns a dataframe slice at given time index (rows for all assets)
        return self.df.iloc[idx*self.n_assets:(idx+1)*self.n_assets]

    def reset(self):
        # align data length to full steps
        # compute number of time steps (assumes full panel: each timestamp has rows for all assets)
        timestamps = self.df['date'].unique()
        self.timestamps = np.sort(timestamps)
        self.steps_total = len(self.timestamps)
        self.current_step = 0
        self.portfolio_value = self.initial_cash
        self.current_weights = np.zeros(self.n_assets, dtype=np.float32)
        self.position_value = np.zeros(self.n_assets, dtype=np.float32)
        self.returns_window = []
        return self._get_obs()

    def _get_obs(self):
        # build observation vector for current_step
        t = self.timestamps[self.current_step]
        rows = self.df[self.df['date'] == t].set_index('tic').reindex(self.assets)
        # if missing values, fill with zeros
        closes = rows['close'].fillna(method='ffill').fillna(0).values
        ma_diff = rows['ma_diff_5_20'].fillna(0).values
        zscore = rows['zscore_20'].fillna(0).values
        vol = rows['vol_20'].fillna(0).values
        # global
        vix = rows['vix'].iloc[0] if 'vix' in rows.columns else 0.0
        turb = rows['turbulence'].iloc[0] if 'turbulence' in rows.columns else 0.0
        obs = np.concatenate([closes, ma_diff, zscore, vol, np.array([vix, turb])])
        return obs.astype(np.float32)

    def step(self, action):
        # action: target weights in [-1,1] for each asset
        action = np.clip(action, -1.0, 1.0)
        prev_portfolio_value = self.portfolio_value
        # price vector at current step
        t = self.timestamps[self.current_step]
        rows = self.df[self.df['date'] == t].set_index('tic').reindex(self.assets)
        prices = rows['close'].fillna(method='ffill').fillna(0).values

        # compute trade amounts (absolute USD)
        target_values = action * self.portfolio_value  # target dollar allocation per asset (could scale differently)
        current_values = self.current_weights * self.portfolio_value
        trade_values = target_values - current_values
        trade_sizes = np.abs(trade_values)

        # execution cost: commission + slippage
        commission = self.commission_rate * trade_sizes.sum()
        # approximate slippage using volatility (use vol column)
        vols = rows['vol_20'].fillna(0).values
        slippage = (self.slippage_coef * trade_sizes * (vols + 1e-6)).sum()

        execution_cost = commission + slippage

        # PnL: change in price from current to next step (we assume immediate rebalancing at current price,
        # then price move to next close will realize PnL)
        # we need next prices; if at last step, no price change
        if self.current_step + 1 < self.steps_total:
            t_next = self.timestamps[self.current_step + 1]
            rows_next = self.df[self.df['date'] == t_next].set_index('tic').reindex(self.assets)
            prices_next = rows_next['close'].fillna(method='ffill').fillna(0).values
            returns = (prices_next - prices) / (prices + 1e-9)
        else:
            returns = np.zeros_like(prices)

        # update holdings to target
        self.current_weights = target_values / (self.portfolio_value + 1e-9)
        # compute pnl from returns on target weights
        pnl = (self.current_weights * returns).sum() * self.portfolio_value

        # update portfolio value
        self.portfolio_value = self.portfolio_value + pnl - execution_cost

        # compute reward: raw return minus exposure_penalty and turnover penalty
        ret = (self.portfolio_value - prev_portfolio_value) / (prev_portfolio_value + 1e-9)
        net_exposure = np.abs(self.current_weights).sum()
        exposure_penalty = self.exposure_penalty * net_exposure
        turnover = trade_sizes.sum() / (self.portfolio_value + 1e-9)
        turnover_pen = self.turnover_penalty * turnover

        # rolling sharpe proxy
        self.returns_window.append(ret)
        if len(self.returns_window) > self.sharpe_window:
            self.returns_window.pop(0)
        if len(self.returns_window) >= max(5, int(self.sharpe_window/10)):
            mean_r = np.mean(self.returns_window)
            std_r = np.std(self.returns_window) + 1e-9
            sharpe_proxy = mean_r / std_r
            reward = sharpe_proxy - exposure_penalty - turnover_pen
        else:
            reward = ret - exposure_penalty - turnover_pen

        self.current_step += 1
        done = self.current_step >= self.steps_total - 1
        info = {'pnl': pnl, 'execution_cost': execution_cost, 'net_exposure': net_exposure, 'portfolio_value': self.portfolio_value}

        obs = self._get_obs() if not done else np.zeros(self.observation_space.shape, dtype=np.float32)
        return obs, float(reward), bool(done), info

    def render(self, mode='human'):
        print(f"Step {self.current_step} PV={self.portfolio_value:.2f} weights_mean={self.current_weights.mean():.4f}")


In [None]:

# Walk-forward training & evaluation utilities
def make_env_from_df(df_slice, assets):
    # df_slice must contain rows for consecutive timestamps for all assets
    return MarketNeutralEnv(df_slice, assets)

def evaluate_model_on_df(model, df_test, assets):
    env = MarketNeutralEnv(df_test, assets)
    obs = env.reset()
    done=False
    history = []
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        history.append(info)
    # Collect metrics
    pv = history[-1]['portfolio_value'] if history else env.portfolio_value
    returns = [h.get('pnl',0)/ (env.initial_cash+1e-9) for h in history]
    cum_return = pv / env.initial_cash - 1.0
    # Rough annualized approx (requires timestamps frequency; skipped here) - return cum_return for now
    return {'cumulative_return': cum_return, 'history': history}

def walk_forward_train(df, assets, train_months=TRAIN_WINDOW_MONTHS, val_months=VAL_MONTHS, test_months=TEST_MONTHS):
    results = []
    timestamps = np.sort(df['date'].unique())
    start = timestamps[0]
    end = timestamps[-1]
    cur_train_start = pd.to_datetime(start)
    while True:
        train_end = cur_train_start + pd.DateOffset(months=train_months)
        val_end = train_end + pd.DateOffset(months=val_months)
        test_end = val_end + pd.DateOffset(months=test_months)
        if test_end > pd.to_datetime(end):
            break
        df_train = df[(df['date'] >= cur_train_start) & (df['date'] < train_end)].reset_index(drop=True)
        df_val = df[(df['date'] >= train_end) & (df['date'] < val_end)].reset_index(drop=True)
        df_test = df[(df['date'] >= val_end) & (df['date'] < test_end)].reset_index(drop=True)
        print(f"WF window: train {cur_train_start.date()} -> {train_end.date()}, test {val_end.date()} -> {test_end.date()}")
        env_train = DummyVecEnv([lambda: MarketNeutralEnv(df_train, assets)])
        # optional normalization
        try:
            env_train = VecNormalize(env_train, norm_obs=True, norm_reward=False, clip_obs=10.0)
        except Exception as e:
            print("VecNormalize not available or failed:", e)
        model = SAC('MlpPolicy', env_train, **SAC_PARAMS)
        model.learn(total_timesteps=TOTAL_TIMESTEPS)
        # save model
        model_path = os.path.join(OUTPUT_DIR, f"sac_model_{cur_train_start.date()}_{test_end.date()}.zip")
        model.save(model_path)
        # evaluate
        perf = evaluate_model_on_df(model, df_test, assets)
        perf['model_path'] = model_path
        results.append(perf)
        # roll forward by test_months
        cur_train_start += pd.DateOffset(months=test_months)
    # save results
    with open(os.path.join(OUTPUT_DIR, 'walk_forward_results.json'), 'w') as f:
        json.dump(results, f, default=str)
    return results


In [None]:

# Run the walk-forward training (this may take a long time; adjust TOTAL_TIMESTEPS)
# Ensure assets are a complete list covering df timestamps
if len(ASSETS)==0:
    ASSETS = df['tic'].unique().tolist()
results = walk_forward_train(df, ASSETS)
print("Walk-forward finished. Results:", results)



# Final notes and next steps

- Если хочешь ускорить тестирование — уменьш TOTAL_TIMESTEPS и/или сократи train/test окна.
- Подумай об увеличении разнообразия данных (несколько лет) или data augmentation (noise/bootstrap) если переживаешь о смещении.
- После получения моделей — проведи более строгую оценку: annualized metrics, sharpe, drawdown, bootstrap p-values.

**Файлы, созданные этим ноутбуком:** модели `.zip` в `OUTPUT_DIR` и `walk_forward_results.json`.
