--------------
SET UP
--------------


In [16]:
# import necessary packages
import alpaca_trade_api as tradeapi
import pandas as pd
import numpy as np
from typing import Optional, Sequence, Union
import pandas as pd
import yfinance as yf
from datetime import date
from dateutil.relativedelta import relativedelta
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
from sklearn.linear_model import ElasticNetCV
from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso, Ridge
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import logging

In [17]:
# Alpaca set up

# We don't have to hard code these in, but we can for now
ALPACA_API_KEY = "PKSRU8TI6JTHJG0KF87R"
ALPACA_SECRET_KEY = "goumjgb1Ua9JZ5jpgF9iWiM07GJcCiXnwftYq8My" 
ALPACA_BASE_URL = "https://paper-api.alpaca.markets"  


api = tradeapi.REST(
    ALPACA_API_KEY,
    ALPACA_SECRET_KEY,
    ALPACA_BASE_URL,
    api_version='v2'
)

# Test connection
account = api.get_account()
print(f"Account Status: {account.status}")
print(f"Buying Power: ${account.buying_power}")

Account Status: ACTIVE
Buying Power: $200000


----------------
CREATE FUNCTIONS
----------------


In [18]:

# Alternate way to get data by using alpaca instead (since we're just using daily data now)
def get_live_data(symbol, lookback_days=2520):  # ~10 years
    end = pd.Timestamp.now()
    start = end - pd.Timedelta(days=lookback_days)
    
    # Get historical data from Alpaca
    bars = api.get_bars(
        symbol,
        tradeapi.TimeFrame.Day,
        start=start.isoformat(),
        end=end.isoformat()
    ).df
    
    return bars['close']


In [19]:

# FUNCTIONS FOR CREATING A BUNCH OF TECHNICAL INDICATORS

# ---------- helpers ----------
def _safe_div(a, b):
    out = np.divide(a, b, out=np.full_like(a, np.nan, dtype="float64"), where=(b != 0) & np.isfinite(b))
    return out

def _ensure_series(close: Union[pd.Series, pd.DataFrame], name="close") -> pd.Series:
    if isinstance(close, pd.DataFrame):
        if "Close" in close.columns:
            close = close["Close"]
        elif close.shape[1] == 1:
            close = close.iloc[:, 0]
        else:
            raise ValueError("If you pass a DataFrame, it must have a 'Close' column or one column only.")
    close = pd.Series(close, name=name).astype("float64")
    close.index = pd.to_datetime(close.index)
    return close.sort_index()

def _mac(a, span):
    # Pandas ewm mean with common trading practice settings
    return pd.Series(a).ewm(span=span, adjust=False, min_periods=span).mean()

def _rsi(close: pd.Series, window: int) -> pd.Series:
    # Close-only RSI (Wilder’s)
    delta = close.diff()
    up = delta.clip(lower=0.0)
    down = -delta.clip(upper=0.0)
    rs = up.ewm(alpha=1/window, adjust=False, min_periods=window).mean() / \
         down.ewm(alpha=1/window, adjust=False, min_periods=window).mean()
    return 100 - (100 / (1 + rs))

def _rolling_percentile_rank(x: np.ndarray) -> float:
    # rank of last point within window (right-sided, ties -> max rank), scaled [0,1]
    arr = np.sort(x)
    r = np.searchsorted(arr, x[-1], side="right")
    return r / len(x)

def _rolling_pos_in_range(close: pd.Series, window: int) -> pd.Series:
    roll_max = close.rolling(window, min_periods=window).max()
    roll_min = close.rolling(window, min_periods=window).min()
    return _safe_div((close - roll_min).to_numpy(), (roll_max - roll_min).to_numpy())

def _macd(close: pd.Series, fast: int, slow: int, signal: int):
    ema_fast = close.ewm(span=fast, adjust=False, min_periods=fast).mean()
    ema_slow = close.ewm(span=slow, adjust=False, min_periods=slow).mean()
    macd = ema_fast - ema_slow
    sig = macd.ewm(span=signal, adjust=False, min_periods=signal).mean()
    hist = macd - sig
    return macd, sig, hist

def _dema(close: pd.Series, span: int) -> pd.Series:
    ema = close.ewm(span=span, adjust=False, min_periods=span).mean()
    ema_ema = ema.ewm(span=span, adjust=False, min_periods=span).mean()
    return 2 * ema - ema_ema

def _tema(close: pd.Series, span: int) -> pd.Series:
    e1 = close.ewm(span=span, adjust=False, min_periods=span).mean()
    e2 = e1.ewm(span=span, adjust=False, min_periods=span).mean()
    e3 = e2.ewm(span=span, adjust=False, min_periods=span).mean()
    return 3 * (e1 - e2) + e3

# ---------- main ----------
def build_tech_indicators(
    close: Union[pd.Series, pd.DataFrame],
    macro: Optional[pd.DataFrame] = None,
    *,
    ma_windows: Sequence[int] = (
        3,5,7,9,10,12,14,15,18,20,21,24,30,34,35,40,45,50,55,60,63,70,75,80,90,100,120,126,150,180,200,210,220,240,250,252
    ),
    roc_periods: Sequence[int] = (
        1,2,3,4,5,7,9,10,12,14,15,20,21,30,35,40,45,50,60,63,90,120,126,150,180,200,252
    ),
    rsi_windows: Sequence[int] = (6,7,9,10,12,14,20,21,28),
    bb_windows: Sequence[int] = (20,50,100),
    bb_k: float = 2.0,
    ewm_vol_windows: Sequence[int] = (10,20,21,30,50,63),
    acf_windows: Sequence[int] = (21,63,126),
    acf_lags: Sequence[int] = (1,2,3,4,5),
    skew_kurt_windows: Sequence[int] = (21,63,126,252),
    dd_windows: Sequence[int] = (21,63,126,252),
    pos_range_windows: Sequence[int] = (10,20,50,100,252),
    dema_tema_windows: Sequence[int] = (10,12,14,20,21,30,35,50,63,100),
    sharpe_windows: Sequence[int] = (21,63,126,252),
    macro_lags: Sequence[int] = (1,5,10),
    macro_windows: Sequence[int] = (5,21,63,126,252),
    dropna: bool = True
) -> pd.DataFrame:
    """
    Build a wide feature matrix of technical indicators from close prices and optional macro series.
    Parameters
    ----------
    close : pd.Series or DataFrame
        Close prices (datetime index). If DataFrame, must include 'Close' or have 1 column.
    macro : pd.DataFrame, optional
        Exogenous series (e.g., VIX). Columns = variables, datetime index. Aligned to 'close'.
    Returns
    -------
    pd.DataFrame
        Time-indexed feature DataFrame.
    """
    close = _ensure_series(close, name="close")
    idx = close.index
    feats = pd.DataFrame(index=idx)

    # Basic transforms
    logp = np.log(close)
    ret1 = logp.diff()                   # log return t/t-1
    feats["ret_1"] = ret1

    # Multi-horizon ROC (simple returns) and log returns
    for p in roc_periods:
        feats[f"roc_{p}"] = close.pct_change(p)
        feats[f"logret_{p}"] = logp.diff(p)

    # Moving averages, EMA, rolling std, z-score, distance-to-MA
    for w in ma_windows:
        sma = close.rolling(w, min_periods=w).mean()
        ema = close.ewm(span=w, adjust=False, min_periods=w).mean()
        std = close.rolling(w, min_periods=w).std()
        feats[f"sma_{w}"] = sma
        feats[f"ema_{w}"] = ema
        feats[f"std_{w}"] = std
        feats[f"zscore_{w}"] = _safe_div((close - sma).to_numpy(), std.to_numpy())
        feats[f"dist_to_sma_{w}"] = _safe_div((close - sma).to_numpy(), sma.to_numpy())

    # RSI
    for w in rsi_windows:
        feats[f"rsi_{w}"] = _rsi(close, w)

    # Bollinger features
    for w in bb_windows:
        mid = close.rolling(w, min_periods=w).mean()
        sd = close.rolling(w, min_periods=w).std()
        upper = mid + bb_k * sd
        lower = mid - bb_k * sd
        feats[f"bb_pctb_{w}"] = _safe_div((close - lower).to_numpy(), (upper - lower).to_numpy())
        feats[f"bb_bw_{w}"] = _safe_div((upper - lower).to_numpy(), mid.to_numpy())

    # MACD variants
    for (f, s, sig) in [(12,26,9), (5,35,5), (8,17,9)]:
        macd, sigl, hist = _macd(close, f, s, sig)
        feats[f"macd_{f}_{s}_{sig}"] = macd
        feats[f"macd_signal_{f}_{s}_{sig}"] = sigl
        feats[f"macd_hist_{f}_{s}_{sig}"] = hist

    # EWM volatility on log returns
    for w in ewm_vol_windows:
        feats[f"ewm_vol_{w}"] = ret1.ewm(span=w, adjust=False, min_periods=w).std()

    # Rolling skew/kurt of log returns
    for w in skew_kurt_windows:
        feats[f"skew_ret_{w}"] = ret1.rolling(w, min_periods=w).skew()
        feats[f"kurt_ret_{w}"] = ret1.rolling(w, min_periods=w).kurt()

    # Rolling autocorr of returns (multiple windows × lags)
    for w in acf_windows:
        for L in acf_lags:
            feats[f"autocorr_ret_w{w}_lag{L}"] = ret1.rolling(w, min_periods=w).corr(ret1.shift(L))

    # Position within rolling range (0..1) and percentile ranks
    for w in pos_range_windows:
        feats[f"pos_in_range_{w}"] = _rolling_pos_in_range(close, w)
        feats[f"pctrank_{w}"] = close.rolling(w, min_periods=w).apply(_rolling_percentile_rank, raw=True)

    # Drawdowns (current & min-in-window)
    cum_max = close.cummax()
    curr_dd = (close / cum_max) - 1.0
    feats["drawdown_curr"] = curr_dd
    for w in dd_windows:
        wmax = close.rolling(w, min_periods=w).max()
        wdd = (close / wmax) - 1.0
        feats[f"drawdown_curr_{w}"] = wdd
        # worst drawdown observed inside each rolling window
        # compute rolling max drawdown via rolling of (close/rolling_max -1) min
        feats[f"drawdown_min_{w}"] = wdd.rolling(w, min_periods=w).min()

    # Time since rolling high/low
    for w in dd_windows:
        # positions within window: 0..w-1, we convert to "age"
        pos_max = close.rolling(w, min_periods=w).apply(np.argmax, raw=True)
        pos_min = close.rolling(w, min_periods=w).apply(np.argmin, raw=True)
        feats[f"days_since_high_{w}"] = (w - 1) - pos_max
        feats[f"days_since_low_{w}"]  = (w - 1) - pos_min

    # DEMA / TEMA
    for w in dema_tema_windows:
        dema = _dema(close, w)
        tema = _tema(close, w)
        feats[f"dema_{w}"] = dema
        feats[f"tema_{w}"] = tema
        feats[f"dist_to_dema_{w}"] = _safe_div((close - dema).to_numpy(), dema.to_numpy())
        feats[f"dist_to_tema_{w}"] = _safe_div((close - tema).to_numpy(), tema.to_numpy())

    # Rolling Sharpe (mean/std of log returns)
    for w in sharpe_windows:
        mu = ret1.rolling(w, min_periods=w).mean()
        sd = ret1.rolling(w, min_periods=w).std()
        feats[f"sharpe_{w}"] = _safe_div((mu * np.sqrt(252)).to_numpy(), sd.to_numpy())

    # ---------- Macro features ----------
    if macro is not None and len(macro.columns) > 0:
        macro = macro.copy()
        macro.index = pd.to_datetime(macro.index)
        macro = macro.sort_index().reindex(idx)  # align to close index

        # Ensure float
        for col in macro.columns:
            macro[col] = pd.to_numeric(macro[col], errors="coerce")

        # Macro returns
        mret = np.log(macro).diff()

        for col in macro.columns:
            # basic levels & lags
            feats[f"{col}_level"] = macro[col]
            for L in macro_lags:
                feats[f"{col}_ret_lag{L}"] = mret[col].shift(L)

            # rolling stats & correlations/betas with the asset's returns
            for w in macro_windows:
                feats[f"{col}_vol_{w}"] = mret[col].rolling(w, min_periods=w).std()
                corr = ret1.rolling(w, min_periods=w).corr(mret[col])
                cov  = ret1.rolling(w, min_periods=w).cov(mret[col])
                varm = mret[col].rolling(w, min_periods=w).var()
                beta = pd.Series(_safe_div(cov.to_numpy(), varm.to_numpy()), index=idx)
                feats[f"corr_{col}_{w}"] = corr
                feats[f"beta_{col}_{w}"] = beta
                feats[f"r2_{col}_{w}"] = corr**2  # simple proxy

            # simple cross terms (contemporaneous and lagged influence)
            for L in (0, 1, 5):
                feats[f"ret_x_{col}_ret_lag{L}"] = ret1 * mret[col].shift(L)

    # Final cleaning
    feats = feats.replace([np.inf, -np.inf], np.nan)
    if dropna:
        feats = feats.dropna()

    return feats


In [7]:

# BACKTESTING FUNCTION
def backtest_1day_leverage(close_prices, signals, leverage=5.0, dates=None, apply_next_day=False):
    """
    close_prices : np.ndarray shape (T,)
    signals      : list or np.ndarray shape (T,) with values in {-1,0,1}
                   +1 = buy, -1 = sell, 0 = flat
    leverage     : float, e.g., 5.0 for 5x long/short
    dates        : optional sequence of datetime-like (len T). If provided, returns a DataFrame.
    apply_next_day : if True, use signal[t] on return[t+1] (no look-ahead for EOD signals)
                     if False, use signal[t] on return[t] (intraday/same-day signal)
    Returns:
        If dates is None: dict of NumPy arrays
        Else            : Pandas DataFrame indexed by dates
    """
    cp = np.asarray(close_prices, dtype=float).reshape(-1)
    sig = np.asarray(signals, dtype=float).reshape(-1)

    if cp.ndim != 1 or sig.ndim != 1:
        raise ValueError("close_prices and signals must be 1-D.")
    if cp.shape[0] != sig.shape[0]:
        raise ValueError(f"Length mismatch: prices={cp.shape[0]}, signals={sig.shape[0]}")

    T = cp.shape[0]

    # daily % change; first element is NaN (no prior close)
    daily_ret = np.empty(T, dtype=float)
    daily_ret[0] = np.nan
    daily_ret[1:] = (cp[1:] - cp[:-1]) / cp[:-1]

    # apply signal with optional next-day shift
    if apply_next_day:
        # use today’s signal for tomorrow’s return
        eff_sig = np.empty(T, dtype=float); eff_sig[:] = 0.0
        eff_sig[1:] = sig[:-1]
    else:
        eff_sig = sig

    # strategy return for each day (NaN day -> 0 return)
    strat_ret = leverage * eff_sig * daily_ret
    strat_ret[~np.isfinite(strat_ret)] = 0.0  # set NaN on day 0 to 0

    # equity curve (start at 1.0)
    equity = np.empty(T, dtype=float)
    equity[0] = 1.0
    # (1 + r_t) cumulative product; handle potential tiny numerical issues
    equity[1:] = np.cumprod(1.0 + strat_ret[1:]) * 1.0

    # Pack results
    if dates is None:
        return {
            "close": cp,
            "signal": sig,
            "daily_ret": daily_ret,
            "strat_ret": strat_ret,
            "equity": equity,
        }
    else:
        idx = pd.to_datetime(pd.Index(dates))
        out = pd.DataFrame({
            "close": cp,
            "signal": sig,
            "daily_ret": daily_ret,
            "strat_ret": strat_ret,
            "equity": equity,
        }, index=idx)
        return out





In [8]:
# FUNCTIONS FOR PLOTTING


def plot_equity_price_signals(close_prices, equity, signals, dates=None, title="Strategy vs Asset"):
    cp = np.asarray(close_prices, dtype=float).reshape(-1)
    eq = np.asarray(equity, dtype=float).reshape(-1)
    sig = np.asarray(signals, dtype=int).reshape(-1)
    if not (len(cp) == len(eq) == len(sig)):
        raise ValueError(f"Length mismatch: price={len(cp)}, equity={len(eq)}, signals={len(sig)}")

    # Normalize price to start at 1.0
    cp_norm = cp / cp[0]

    # Handle dates
    if dates is None:
        x = np.arange(len(cp))
        x_for_scatter = x
        xlabel = "Index"
    else:
        idx = pd.to_datetime(pd.Index(dates))
        x = idx
        x_for_scatter = np.array(idx)
        xlabel = "Date"

    buys  = sig == 1
    sells = sig == -1

    fig, ax = plt.subplots()

    # Plot normalized price
    ax.plot(x, cp_norm, label="Price (normed)", color="tab:blue")
    ax.scatter(x_for_scatter[buys],  cp_norm[buys],  marker="^", s=40, label="Buy", color="green")
    ax.scatter(x_for_scatter[sells], cp_norm[sells], marker="v", s=40, label="Sell", color="red")

    # Plot equity
    ax.plot(x, eq, label="Equity", color="tab:orange")

    ax.set_xlabel(xlabel)
    ax.set_ylabel("Normalized Value (start=1.0)")
    ax.legend(loc="best")

    plt.title(title)
    plt.tight_layout()
    plt.show()



# Convenience wrapper if you pass the backtest output directly
def plot_from_backtest(bt):
    """
    bt: dict returned by backtest_1day_leverage(...) OR a DataFrame with
        columns ['close','equity','signal'] and a DatetimeIndex.
    """
    if isinstance(bt, dict):
        return plot_equity_price_signals(bt["close"], bt["equity"], bt["signal"])
    else:
        return plot_equity_price_signals(
            bt["close"].to_numpy(),
            bt["equity"].to_numpy(),
            bt["signal"].to_numpy(),
            dates=bt.index
        )

In [9]:
# PERFORMANCE METRICS FUNCTIONS

def tangency_weights(returns,dropna=True,scale_cov=1):
    if dropna:
        returns = returns.dropna()

    covmat_full = returns.cov()
    covmat_diag = np.diag(np.diag(covmat_full))
    covmat = scale_cov * covmat_full + (1-scale_cov) * covmat_diag

    weights = np.linalg.solve(covmat,returns.mean())
    weights = weights / weights.sum()

    return pd.DataFrame(weights, index=returns.columns,columns=['tangency weights'])


        

def performanceMetrics(returns,annualization=1, quantile=.05):
    metrics = pd.DataFrame(index=returns.columns)
    metrics['Mean'] = returns.mean() * annualization
    metrics['Vol'] = returns.std() * np.sqrt(annualization)
    metrics['Sharpe'] = (returns.mean() / returns.std()) * np.sqrt(annualization)

    metrics['Min'] = returns.min()
    metrics['Max'] = returns.max()
    return metrics





def display_correlation(df,list_maxmin=True):
    
    corrmat = df.corr()
    #ignore self-correlation
    corrmat[corrmat==1] = None
    sns.heatmap(corrmat)

    if list_maxmin:
        corr_rank = corrmat.unstack().sort_values().dropna()
        pair_max = corr_rank.index[-1]
        pair_min = corr_rank.index[0]

        print(f'MIN Correlation pair is {pair_min}')
        print(f'MAX Correlation pair is {pair_max}')
        
    return

        
        
def maximumDrawdown(returns):
    cum_returns = (1 + returns).cumprod()
    rolling_max = cum_returns.cummax()
    drawdown = (cum_returns - rolling_max) / rolling_max

    max_drawdown = drawdown.min()
    end_date = drawdown.idxmin()
    summary = pd.DataFrame({'Max Drawdown': max_drawdown, 'Bottom': end_date})

    for col in drawdown:
        summary.loc[col,'Peak'] = (rolling_max.loc[:end_date[col],col]).idxmax()
        recovery = (drawdown.loc[end_date[col]:,col])
        try:
            summary.loc[col,'Recover'] = pd.to_datetime(recovery[recovery >= 0].index[0])
        except:
            summary.loc[col,'Recover'] = pd.to_datetime(None)

        summary['Peak'] = pd.to_datetime(summary['Peak'])
        try:
            summary['Duration (to Recover)'] = (summary['Recover'] - summary['Peak'])
        except:
            summary['Duration (to Recover)'] = None
            
        summary = summary[['Max Drawdown','Peak','Bottom','Recover','Duration (to Recover)']]

    return summary    




def tailMetrics(returns, quantile=.05, relative=False, mdd=True):
    metrics = pd.DataFrame(index=returns.columns)
    metrics['Skewness'] = returns.skew()
    metrics['Kurtosis'] = returns.kurtosis()

    VaR = returns.quantile(quantile)
    CVaR = (returns[returns < returns.quantile(quantile)]).mean()

    if relative:
        VaR = (VaR - returns.mean())/returns.std()
        CVaR = (CVaR - returns.mean())/returns.std()

    metrics[f'VaR ({quantile})'] = VaR
    metrics[f'CVaR ({quantile})'] = CVaR

    if mdd:
        mdd_stats = maximumDrawdown(returns)
        metrics = metrics.join(mdd_stats)

        if relative:
            metrics['Max Drawdown'] = (metrics['Max Drawdown'] - returns.mean())/returns.std()

    return metrics






def get_ols_metrics(regressors, targets, annualization=1, ignorenan=True):
    # ensure regressors and targets are pandas dataframes, as expected
    if not isinstance(regressors, pd.DataFrame):
        regressors = regressors.to_frame()
    if not isinstance(targets, pd.DataFrame):
        targets = targets.to_frame()

    # align the targets and regressors on the same dates
    df_aligned = targets.join(regressors, how='inner', lsuffix='y ')
    Y = df_aligned[targets.columns]
    Xset = df_aligned[regressors.columns]

    reg = pd.DataFrame(index=targets.columns)
    for col in Y.columns:
        y = Y[col]
        
        if ignorenan:
            # ensure we use only non-NaN dates
            alldata = Xset.join(y,lsuffix='X')
            mask = alldata.notnull().all(axis=1)
            y = y[mask]
            X = Xset[mask]
        else:
            X = Xset

        model = LinearRegression().fit(X, y)
        reg.loc[col, 'alpha'] = model.intercept_ * annualization
        reg.loc[col, regressors.columns] = model.coef_
        reg.loc[col, 'r-squared'] = model.score(X, y)

        # sklearn does not return the residuals, so we need to build them
        yfit = model.predict(X)
        residuals = y - yfit

        num_roundoff = 1e-12
        
        # Treynor Ratio is only defined for univariate regression
        if Xset.shape[1] == 1:
            if np.abs(model.coef_) < num_roundoff:
                reg.loc[col,'Treynor Ratio'] = None
            else:
                reg.loc[col,'Treynor Ratio'] = (y.mean() / model.coef_) * annualization

        
        # if intercept =0, numerical roundoff will nonetheless show nonzero Info Ratio        
        if np.abs(model.intercept_) < num_roundoff:
            reg.loc[col, 'Info Ratio'] = None
        else:
            reg.loc[col, 'Info Ratio'] = (model.intercept_ / residuals.std()) * np.sqrt(annualization)

    return reg





def penalized_reg_limit_gross(func, X, y, limit=2, penalty=1e-6, fit_intercept=True):
    wts = np.ones(X.shape[1]) * 100
    
    while np.abs(wts).sum()>limit:
        penalty *= 1.1
        model = func(alpha=penalty, fit_intercept=fit_intercept).fit(X,y)
        wts = model.coef_ / model.coef_.sum()
    
    return wts, penalty



def penalized_reg_limit_X(func, X, y, limit=10, penalty=1e-6, fit_intercept=True):
    wts = np.ones(X.shape[1]) * 100
    
    Nx = np.inf
    while Nx>limit:
        penalty *= 1.1
        model = func(alpha=penalty, fit_intercept=fit_intercept).fit(X,y)
        wts = model.coef_ / model.coef_.sum()
    
        if func is Lasso:
            Nx = (np.abs(wts)>1e-4).sum()            
        else:
            Nx = 0
            
    return wts, penalty






import scipy.cluster.hierarchy as sch

def cluster_corr(corr_array, inplace=False):
    """
    Rearranges the correlation matrix, corr_array, so that groups of highly 
    correlated variables are next to eachother 
    
    Parameters
    ----------
    corr_array : pandas.DataFrame or numpy.ndarray
        a NxN correlation matrix 
        
    Returns
    -------
    pandas.DataFrame or numpy.ndarray
        a NxN correlation matrix with the columns and rows rearranged
    """
    pairwise_distances = sch.distance.pdist(corr_array)
    linkage = sch.linkage(pairwise_distances, method='complete')
    cluster_distance_threshold = pairwise_distances.max()/2
    idx_to_cluster_array = sch.fcluster(linkage, cluster_distance_threshold, 
                                        criterion='distance')
    idx = np.argsort(idx_to_cluster_array)
    
    if not inplace:
        corr_array = corr_array.copy()
    
    if isinstance(corr_array, pd.DataFrame):
        return corr_array.iloc[idx, :].T.iloc[idx, :]
    return corr_array[idx, :][:, idx]

------------
CORE TRADING CLASS 
------------

In [20]:
import alpaca_trade_api as tradeapi
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime, timedelta
import time
import logging
from typing import Optional, Dict, List, Union
import warnings
warnings.filterwarnings('ignore')

class CoreTradingSystem:
    """
    Core trading system that integrates:
    - Alpaca API for live trading
    - yfinance for historical data
    - Technical indicators
    - Backtesting capabilities
    - Performance metrics
    """
    
    def __init__(self, 
                 alpaca_api_key: str,
                 alpaca_secret_key: str,
                 alpaca_base_url: str = "https://paper-api.alpaca.markets",
                 default_lookback_days: int = 2520):  # ~10 years
        
        # Initialize Alpaca API
        self.api = tradeapi.REST(
            alpaca_api_key,
            alpaca_secret_key,
            alpaca_base_url,
            api_version='v2'
        )
        
        # Trading parameters
        self.default_lookback = default_lookback_days
        self.positions = {}
        self.signals_history = {}
        self.performance_data = {}
        
        # Set up logging
        logging.basicConfig(level=logging.INFO)
        self.logger = logging.getLogger(__name__)
        
        # Test connection
        try:
            account = self.api.get_account()
            self.logger.info(f"Connected to Alpaca - Account Status: {account.status}")
            self.logger.info(f"Buying Power: ${account.buying_power}")
        except Exception as e:
            self.logger.error(f"Failed to connect to Alpaca: {e}")
            raise
    
    def get_historical_data(self, symbol: str, lookback_days: Optional[int] = None) -> pd.Series:
        """
        Get historical price data using yfinance
        Returns close prices as pandas Series
        """
        if lookback_days is None:
            lookback_days = self.default_lookback
            
        try:
            # Calculate date range
            end_date = datetime.now()
            start_date = end_date - timedelta(days=lookback_days)
            
            # Fetch data using yfinance
            ticker = yf.Ticker(symbol)
            hist_data = ticker.history(start=start_date, end=end_date)
            
            if hist_data.empty:
                raise ValueError(f"No data found for symbol {symbol}")
            
            # Return close prices
            close_prices = hist_data['Close']
            close_prices.name = 'close'
            
            self.logger.info(f"Retrieved {len(close_prices)} days of data for {symbol}")
            return close_prices
            
        except Exception as e:
            self.logger.error(f"Error fetching data for {symbol}: {e}")
            raise
    
    def create_features(self, symbol: str, macro_data: Optional[pd.DataFrame] = None) -> pd.DataFrame:
        """
        Create technical indicators for a given symbol
        Uses your build_tech_indicators function
        """
        try:
            # Get historical data
            close_prices = self.get_historical_data(symbol)
            
            # Create technical indicators using your existing function
            # Note: Using your original function names with asterisks - you'll need to fix these
            features = self.build_tech_indicators(close_prices, macro=macro_data)
            
            self.logger.info(f"Created {len(features.columns)} features for {symbol}")
            return features
            
        except Exception as e:
            self.logger.error(f"Error creating features for {symbol}: {e}")
            raise
    
    def generate_signals(self, features: pd.DataFrame, model=None) -> pd.Series:
        """
        Generate trading signals from features
        Override this method to implement your specific strategy
        
        Returns:
            pd.Series with values in {-1, 0, 1}
            -1 = sell/short, 0 = hold/flat, 1 = buy/long
        """
        # Placeholder strategy - implement your ML model here
        # This is a simple momentum example
        
        if 'ret_1' in features.columns:
            # Simple momentum strategy based on 1-day return
            signals = pd.Series(0, index=features.index)
            
            # Buy if positive momentum, sell if negative
            momentum = features['ret_1'].rolling(5).mean()
            signals[momentum > 0.001] = 1   # Buy signal
            signals[momentum < -0.001] = -1  # Sell signal
            
            return signals
        else:
            # Return neutral signals if no momentum data
            return pd.Series(0, index=features.index)
    
    def backtest_strategy(self, symbol: str, leverage: float = 1.0, 
                         start_date: Optional[str] = None, 
                         end_date: Optional[str] = None) -> Dict:
        """
        Backtest the strategy for a given symbol
        """
        try:
            # Create features
            features = self.create_features(symbol)
            
            # Generate signals
            signals = self.generate_signals(features)
            
            # Filter by date range if specified
            if start_date or end_date:
                mask = pd.Series(True, index=signals.index)
                if start_date:
                    mask = mask & (signals.index >= pd.to_datetime(start_date))
                if end_date:
                    mask = mask & (signals.index <= pd.to_datetime(end_date))
                
                signals = signals[mask]
                # Get corresponding close prices
                close_prices = self.get_historical_data(symbol)
                close_prices = close_prices[mask]
            else:
                close_prices = self.get_historical_data(symbol)
                # Align signals and prices
                common_dates = signals.index.intersection(close_prices.index)
                signals = signals[common_dates]
                close_prices = close_prices[common_dates]
            
            # Run backtest using your existing function
            backtest_results = self.backtest_1day_leverage(
                close_prices=close_prices.values,
                signals=signals.values,
                leverage=leverage,
                dates=signals.index,
                apply_next_day=True  # Avoid look-ahead bias
            )
            
            # Calculate performance metrics
            returns = backtest_results['strat_ret']
            returns_series = pd.Series(returns, index=backtest_results.index)
            
            # Store results
            self.performance_data[symbol] = {
                'backtest': backtest_results,
                'returns': returns_series,
                'signals': signals,
                'leverage': leverage
            }
            
            self.logger.info(f"Backtesting completed for {symbol}")
            return backtest_results
            
        except Exception as e:
            self.logger.error(f"Error backtesting {symbol}: {e}")
            raise
    
    def get_current_position(self, symbol: str) -> float:
        """Get current position size for a symbol"""
        try:
            position = self.api.get_position(symbol)
            return float(position.qty)
        except:
            # No position found
            return 0.0
    
    def place_order(self, symbol: str, qty: float, side: str = 'buy') -> bool:
        """
        Place an order with Alpaca
        
        Args:
            symbol: Stock symbol
            qty: Quantity (positive number)
            side: 'buy' or 'sell'
        """
        try:
            if qty == 0:
                self.logger.info(f"No order placed for {symbol} - zero quantity")
                return True
            
            order = self.api.submit_order(
                symbol=symbol,
                qty=abs(qty),
                side=side,
                type='market',
                time_in_force='day'
            )
            
            self.logger.info(f"Order placed: {side} {qty} shares of {symbol}")
            return True
            
        except Exception as e:
            self.logger.error(f"Error placing order for {symbol}: {e}")
            return False
    
    def execute_signal(self, symbol: str, signal: int, position_size: float = 1000) -> bool:
        """
        Execute a trading signal
        
        Args:
            symbol: Stock symbol
            signal: -1 (sell/short), 0 (hold), 1 (buy/long)
            position_size: Dollar amount to trade
        """
        try:
            # Get current price and position
            current_price = self.get_historical_data(symbol, lookback_days=2).iloc[-1]
            current_position = self.get_current_position(symbol)
            
            # Calculate target position
            if signal == 1:  # Buy signal
                target_shares = position_size / current_price
            elif signal == -1:  # Sell signal
                target_shares = -position_size / current_price
            else:  # Hold signal
                target_shares = current_position
            
            # Calculate shares to trade
            shares_to_trade = target_shares - current_position
            
            if abs(shares_to_trade) < 1:  # Less than 1 share
                return True
            
            # Execute trade
            if shares_to_trade > 0:
                success = self.place_order(symbol, shares_to_trade, 'buy')
            else:
                success = self.place_order(symbol, abs(shares_to_trade), 'sell')
            
            if success:
                self.positions[symbol] = target_shares
                self.logger.info(f"Signal executed: {symbol} signal={signal}, position={target_shares:.2f}")
            
            return success
            
        except Exception as e:
            self.logger.error(f"Error executing signal for {symbol}: {e}")
            return False
    
    def run_strategy(self, symbols: List[str], position_size: float = 1000, 
                    sleep_time: int = 3600) -> None:
        """
        Run the trading strategy continuously
        
        Args:
            symbols: List of symbols to trade
            position_size: Dollar amount per position
            sleep_time: Time to sleep between iterations (seconds)
        """
        self.logger.info(f"Starting strategy for symbols: {symbols}")
        
        while True:
            try:
                # Check if market is open
                clock = self.api.get_clock()
                if not clock.is_open:
                    self.logger.info("Market is closed, waiting...")
                    time.sleep(300)  # Sleep 5 minutes
                    continue
                
                for symbol in symbols:
                    try:
                        # Create features
                        features = self.create_features(symbol)
                        
                        # Generate signal for latest data point
                        signals = self.generate_signals(features)
                        latest_signal = signals.iloc[-1] if len(signals) > 0 else 0
                        
                        # Execute signal
                        self.execute_signal(symbol, latest_signal, position_size)
                        
                        # Store signal
                        if symbol not in self.signals_history:
                            self.signals_history[symbol] = []
                        self.signals_history[symbol].append({
                            'timestamp': datetime.now(),
                            'signal': latest_signal
                        })
                        
                    except Exception as e:
                        self.logger.error(f"Error processing {symbol}: {e}")
                        continue
                
                # Sleep until next iteration
                self.logger.info(f"Strategy iteration complete, sleeping {sleep_time} seconds")
                time.sleep(sleep_time)
                
            except KeyboardInterrupt:
                self.logger.info("Strategy interrupted by user")
                break
            except Exception as e:
                self.logger.error(f"Unexpected error in strategy loop: {e}")
                time.sleep(60)  # Sleep 1 minute on error
    
    def get_performance_summary(self, symbol: str) -> Dict:
        """Get performance summary for a symbol"""
        if symbol not in self.performance_data:
            self.logger.warning(f"No performance data found for {symbol}")
            return {}
        
        data = self.performance_data[symbol]
        returns = data['returns']
        
        # Calculate basic metrics
        total_return = (data['backtest']['equity'].iloc[-1] - 1) * 100
        sharpe_ratio = (returns.mean() / returns.std()) * np.sqrt(252) if returns.std() > 0 else 0
        max_drawdown = ((data['backtest']['equity'].cummax() - data['backtest']['equity']) / data['backtest']['equity'].cummax()).max() * 100
        
        return {
            'symbol': symbol,
            'total_return_pct': total_return,
            'sharpe_ratio': sharpe_ratio,
            'max_drawdown_pct': max_drawdown,
            'num_trades': (data['signals'] != 0).sum(),
            'win_rate': (returns[returns > 0]).count() / len(returns) * 100 if len(returns) > 0 else 0
        }
    
    # Include your existing functions (with original names)
    # You'll need to copy these from your previous code parts
    
    def build_tech_indicators(self,
        close: Union[pd.Series, pd.DataFrame],
        macro: Optional[pd.DataFrame] = None,
        *,
        ma_windows: Sequence[int] = (
            3,5,7,9,10,12,14,15,18,20,21,24,30,34,35,40,45,50,55,60,63,70,75,80,90,100,120,126,150,180,200,210,220,240,250,252
        ),
        roc_periods: Sequence[int] = (
            1,2,3,4,5,7,9,10,12,14,15,20,21,30,35,40,45,50,60,63,90,120,126,150,180,200,252
        ),
        rsi_windows: Sequence[int] = (6,7,9,10,12,14,20,21,28),
        bb_windows: Sequence[int] = (20,50,100),
        bb_k: float = 2.0,
        ewm_vol_windows: Sequence[int] = (10,20,21,30,50,63),
        acf_windows: Sequence[int] = (21,63,126),
        acf_lags: Sequence[int] = (1,2,3,4,5),
        skew_kurt_windows: Sequence[int] = (21,63,126,252),
        dd_windows: Sequence[int] = (21,63,126,252),
        pos_range_windows: Sequence[int] = (10,20,50,100,252),
        dema_tema_windows: Sequence[int] = (10,12,14,20,21,30,35,50,63,100),
        sharpe_windows: Sequence[int] = (21,63,126,252),
        macro_lags: Sequence[int] = (1,5,10),
        macro_windows: Sequence[int] = (5,21,63,126,252),
        dropna: bool = True
    ) -> pd.DataFrame:
        """
        Build a wide feature matrix of technical indicators from close prices and optional macro series.
        Parameters
        ----------
        close : pd.Series or DataFrame
            Close prices (datetime index). If DataFrame, must include 'Close' or have 1 column.
        macro : pd.DataFrame, optional
            Exogenous series (e.g., VIX). Columns = variables, datetime index. Aligned to 'close'.
        Returns
        -------
        pd.DataFrame
            Time-indexed feature DataFrame.
        """
        close = _ensure_series(close, name="close")
        idx = close.index
        feats = pd.DataFrame(index=idx)

        # Basic transforms
        logp = np.log(close)
        ret1 = logp.diff()                   # log return t/t-1
        feats["ret_1"] = ret1

        # Multi-horizon ROC (simple returns) and log returns
        for p in roc_periods:
            feats[f"roc_{p}"] = close.pct_change(p)
            feats[f"logret_{p}"] = logp.diff(p)

        # Moving averages, EMA, rolling std, z-score, distance-to-MA
        for w in ma_windows:
            sma = close.rolling(w, min_periods=w).mean()
            ema = close.ewm(span=w, adjust=False, min_periods=w).mean()
            std = close.rolling(w, min_periods=w).std()
            feats[f"sma_{w}"] = sma
            feats[f"ema_{w}"] = ema
            feats[f"std_{w}"] = std
            feats[f"zscore_{w}"] = _safe_div((close - sma).to_numpy(), std.to_numpy())
            feats[f"dist_to_sma_{w}"] = _safe_div((close - sma).to_numpy(), sma.to_numpy())

        # RSI
        for w in rsi_windows:
            feats[f"rsi_{w}"] = _rsi(close, w)

        # Bollinger features
        for w in bb_windows:
            mid = close.rolling(w, min_periods=w).mean()
            sd = close.rolling(w, min_periods=w).std()
            upper = mid + bb_k * sd
            lower = mid - bb_k * sd
            feats[f"bb_pctb_{w}"] = _safe_div((close - lower).to_numpy(), (upper - lower).to_numpy())
            feats[f"bb_bw_{w}"] = _safe_div((upper - lower).to_numpy(), mid.to_numpy())

        # MACD variants
        for (f, s, sig) in [(12,26,9), (5,35,5), (8,17,9)]:
            macd, sigl, hist = _macd(close, f, s, sig)
            feats[f"macd_{f}_{s}_{sig}"] = macd
            feats[f"macd_signal_{f}_{s}_{sig}"] = sigl
            feats[f"macd_hist_{f}_{s}_{sig}"] = hist

        # EWM volatility on log returns
        for w in ewm_vol_windows:
            feats[f"ewm_vol_{w}"] = ret1.ewm(span=w, adjust=False, min_periods=w).std()

        # Rolling skew/kurt of log returns
        for w in skew_kurt_windows:
            feats[f"skew_ret_{w}"] = ret1.rolling(w, min_periods=w).skew()
            feats[f"kurt_ret_{w}"] = ret1.rolling(w, min_periods=w).kurt()

        # Rolling autocorr of returns (multiple windows × lags)
        for w in acf_windows:
            for L in acf_lags:
                feats[f"autocorr_ret_w{w}_lag{L}"] = ret1.rolling(w, min_periods=w).corr(ret1.shift(L))

        # Position within rolling range (0..1) and percentile ranks
        for w in pos_range_windows:
            feats[f"pos_in_range_{w}"] = _rolling_pos_in_range(close, w)
            feats[f"pctrank_{w}"] = close.rolling(w, min_periods=w).apply(_rolling_percentile_rank, raw=True)

        # Drawdowns (current & min-in-window)
        cum_max = close.cummax()
        curr_dd = (close / cum_max) - 1.0
        feats["drawdown_curr"] = curr_dd
        for w in dd_windows:
            wmax = close.rolling(w, min_periods=w).max()
            wdd = (close / wmax) - 1.0
            feats[f"drawdown_curr_{w}"] = wdd
            # worst drawdown observed inside each rolling window
            # compute rolling max drawdown via rolling of (close/rolling_max -1) min
            feats[f"drawdown_min_{w}"] = wdd.rolling(w, min_periods=w).min()

        # Time since rolling high/low
        for w in dd_windows:
            # positions within window: 0..w-1, we convert to "age"
            pos_max = close.rolling(w, min_periods=w).apply(np.argmax, raw=True)
            pos_min = close.rolling(w, min_periods=w).apply(np.argmin, raw=True)
            feats[f"days_since_high_{w}"] = (w - 1) - pos_max
            feats[f"days_since_low_{w}"]  = (w - 1) - pos_min

        # DEMA / TEMA
        for w in dema_tema_windows:
            dema = _dema(close, w)
            tema = _tema(close, w)
            feats[f"dema_{w}"] = dema
            feats[f"tema_{w}"] = tema
            feats[f"dist_to_dema_{w}"] = _safe_div((close - dema).to_numpy(), dema.to_numpy())
            feats[f"dist_to_tema_{w}"] = _safe_div((close - tema).to_numpy(), tema.to_numpy())

        # Rolling Sharpe (mean/std of log returns)
        for w in sharpe_windows:
            mu = ret1.rolling(w, min_periods=w).mean()
            sd = ret1.rolling(w, min_periods=w).std()
            feats[f"sharpe_{w}"] = _safe_div((mu * np.sqrt(252)).to_numpy(), sd.to_numpy())

        # ---------- Macro features ----------
        if macro is not None and len(macro.columns) > 0:
            macro = macro.copy()
            macro.index = pd.to_datetime(macro.index)
            macro = macro.sort_index().reindex(idx)  # align to close index

            # Ensure float
            for col in macro.columns:
                macro[col] = pd.to_numeric(macro[col], errors="coerce")

            # Macro returns
            mret = np.log(macro).diff()

            for col in macro.columns:
                # basic levels & lags
                feats[f"{col}_level"] = macro[col]
                for L in macro_lags:
                    feats[f"{col}_ret_lag{L}"] = mret[col].shift(L)

                # rolling stats & correlations/betas with the asset's returns
                for w in macro_windows:
                    feats[f"{col}_vol_{w}"] = mret[col].rolling(w, min_periods=w).std()
                    corr = ret1.rolling(w, min_periods=w).corr(mret[col])
                    cov  = ret1.rolling(w, min_periods=w).cov(mret[col])
                    varm = mret[col].rolling(w, min_periods=w).var()
                    beta = pd.Series(_safe_div(cov.to_numpy(), varm.to_numpy()), index=idx)
                    feats[f"corr_{col}_{w}"] = corr
                    feats[f"beta_{col}_{w}"] = beta
                    feats[f"r2_{col}_{w}"] = corr**2  # simple proxy

                # simple cross terms (contemporaneous and lagged influence)
                for L in (0, 1, 5):
                    feats[f"ret_x_{col}_ret_lag{L}"] = ret1 * mret[col].shift(L)

        # Final cleaning
        feats = feats.replace([np.inf, -np.inf], np.nan)
        if dropna:
            feats = feats.dropna()

        return feats

    
    def backtest_1day_leverage(self, close_prices, signals, leverage=5.0, dates=None, apply_next_day=False):
        """
        close_prices : np.ndarray shape (T,)
        signals      : list or np.ndarray shape (T,) with values in {-1,0,1}
                    +1 = buy, -1 = sell, 0 = flat
        leverage     : float, e.g., 5.0 for 5x long/short
        dates        : optional sequence of datetime-like (len T). If provided, returns a DataFrame.
        apply_next_day : if True, use signal[t] on return[t+1] (no look-ahead for EOD signals)
                        if False, use signal[t] on return[t] (intraday/same-day signal)
        Returns:
            If dates is None: dict of NumPy arrays
            Else            : Pandas DataFrame indexed by dates
        """
        cp = np.asarray(close_prices, dtype=float).reshape(-1)
        sig = np.asarray(signals, dtype=float).reshape(-1)

        if cp.ndim != 1 or sig.ndim != 1:
            raise ValueError("close_prices and signals must be 1-D.")
        if cp.shape[0] != sig.shape[0]:
            raise ValueError(f"Length mismatch: prices={cp.shape[0]}, signals={sig.shape[0]}")

        T = cp.shape[0]

        # daily % change; first element is NaN (no prior close)
        daily_ret = np.empty(T, dtype=float)
        daily_ret[0] = np.nan
        daily_ret[1:] = (cp[1:] - cp[:-1]) / cp[:-1]

        # apply signal with optional next-day shift
        if apply_next_day:
            # use today’s signal for tomorrow’s return
            eff_sig = np.empty(T, dtype=float); eff_sig[:] = 0.0
            eff_sig[1:] = sig[:-1]
        else:
            eff_sig = sig

        # strategy return for each day (NaN day -> 0 return)
        strat_ret = leverage * eff_sig * daily_ret
        strat_ret[~np.isfinite(strat_ret)] = 0.0  # set NaN on day 0 to 0

        # equity curve (start at 1.0)
        equity = np.empty(T, dtype=float)
        equity[0] = 1.0
        # (1 + r_t) cumulative product; handle potential tiny numerical issues
        equity[1:] = np.cumprod(1.0 + strat_ret[1:]) * 1.0

        # Pack results
        if dates is None:
            return {
                "close": cp,
                "signal": sig,
                "daily_ret": daily_ret,
                "strat_ret": strat_ret,
                "equity": equity,
            }
        else:
            idx = pd.to_datetime(pd.Index(dates))
            out = pd.DataFrame({
                "close": cp,
                "signal": sig,
                "daily_ret": daily_ret,
                "strat_ret": strat_ret,
                "equity": equity,
            }, index=idx)
            return out



In [27]:
def test_trading_system():
    """Test the basic functionality of the trading system"""
    
    print("=" * 60)
    print("TESTING CORE TRADING SYSTEM")
    print("=" * 60)
    
    # Step 1: Test API Connection
    print("\n1. Testing Alpaca Connection...")
    try:
        # You'll need to set these environment variables or replace with your keys
        API_KEY = "PKSRU8TI6JTHJG0KF87R"  # Your paper trading key
        SECRET_KEY = "goumjgb1Ua9JZ5jpgF9iWiM07GJcCiXnwftYq8My"  # Your paper trading secret
        
        trader = CoreTradingSystem(
            alpaca_api_key=API_KEY,
            alpaca_secret_key=SECRET_KEY,
            alpaca_base_url="https://paper-api.alpaca.markets"
        )
        print("✓ Alpaca connection successful!")
        
        # Get account info
        account = trader.api.get_account()
        print(f"✓ Account Status: {account.status}")
        print(f"✓ Buying Power: ${float(account.buying_power):,.2f}")
        
    except Exception as e:
        print(f"✗ Alpaca connection failed: {e}")
        return False
    
    # Step 2: Test Data Retrieval
    print("\n2. Testing Historical Data Retrieval...")
    try:
        test_symbol = "AAPL"
        close_prices = trader.get_historical_data(test_symbol, lookback_days=3650)
        
        print(f"✓ Retrieved {len(close_prices)} days of data for {test_symbol}")
        print(f"✓ Date range: {close_prices.index[0].date()} to {close_prices.index[-1].date()}")
        print(f"✓ Latest price: ${close_prices.iloc[-1]:.2f}")
        print(f"✓ Data type: {type(close_prices)}")
        
    except Exception as e:
        print(f"✗ Data retrieval failed: {e}")
        return False
    
    # Step 3: Test Basic Feature Creation (simplified)
    print("\n3. Testing Basic Feature Creation...")
    try:
        # Simple features without your complex function
        features = pd.DataFrame(index=close_prices.index)
        
        # Basic returns
        features['ret_1'] = close_prices.pct_change()
        features['ret_5'] = close_prices.pct_change(5)
        
        # Simple moving averages
        features['sma_20'] = close_prices.rolling(20).mean()
        features['sma_50'] = close_prices.rolling(50).mean()
        
        # RSI (simplified)
        delta = close_prices.diff()
        gain = (delta.where(delta > 0, 0)).rolling(14).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
        rs = gain / loss
        features['rsi_14'] = 100 - (100 / (1 + rs))
        
        # Remove NaN rows
        features = features.dropna()
        
        print(f"✓ Created {len(features.columns)} basic features")
        print(f"✓ Feature data shape: {features.shape}")
        print(f"✓ Latest RSI: {features['rsi_14'].iloc[-1]:.2f}")
        
    except Exception as e:
        print(f"✗ Feature creation failed: {e}")
        return False
    
    # Step 4: Test Signal Generation
    print("\n4. Testing Signal Generation...")
    try:
        # Simple momentum strategy
        signals = pd.Series(0, index=features.index)
        
        # Buy when price above 20-day MA and RSI < 70
        # Sell when price below 20-day MA and RSI > 30
        current_prices = close_prices.reindex(features.index)
        
        buy_condition = (current_prices > features['sma_20']) & (features['rsi_14'] < 70)
        sell_condition = (current_prices < features['sma_20']) & (features['rsi_14'] > 30)
        
        signals[buy_condition] = 1
        signals[sell_condition] = -1
        
        print(f"✓ Generated signals for {len(signals)} days")
        print(f"✓ Buy signals: {(signals == 1).sum()}")
        print(f"✓ Sell signals: {(signals == -1).sum()}")
        print(f"✓ Hold signals: {(signals == 0).sum()}")
        print(f"✓ Latest signal: {signals.iloc[-1]} ({'BUY' if signals.iloc[-1] == 1 else 'SELL' if signals.iloc[-1] == -1 else 'HOLD'})")
        
    except Exception as e:
        print(f"✗ Signal generation failed: {e}")
        return False
    
    # Step 5: Test Simple Backtest
    print("\n5. Testing Simple Backtest...")
    try:
        # Align data
        common_idx = signals.index.intersection(current_prices.index)
        test_signals = signals[common_idx].values
        test_prices = current_prices[common_idx].values
        test_dates = common_idx
        
        # Simple backtest calculation
        returns = np.diff(test_prices) / test_prices[:-1]
        strategy_returns = test_signals[:-1] * returns  # Use signal to get next day's return
        
        # Calculate cumulative returns
        cum_returns = np.cumprod(1 + strategy_returns)
        total_return = (cum_returns[-1] - 1) * 100
        
        # Calculate some basic metrics
        sharpe = np.mean(strategy_returns) / np.std(strategy_returns) * np.sqrt(252) if np.std(strategy_returns) > 0 else 0
        max_dd = np.max(np.maximum.accumulate(cum_returns) - cum_returns) / np.maximum.accumulate(cum_returns).max() * 100
        
        print(f"✓ Backtest completed over {len(strategy_returns)} days")
        print(f"✓ Total Return: {total_return:.2f}%")
        print(f"✓ Sharpe Ratio: {sharpe:.2f}")
        print(f"✓ Max Drawdown: {max_dd:.2f}%")
        
    except Exception as e:
        print(f"✗ Backtesting failed: {e}")
        return False
    
    # Step 6: Test Position Check
    print("\n6. Testing Position Management...")
    try:
        current_position = trader.get_current_position(test_symbol)
        print(f"✓ Current position in {test_symbol}: {current_position} shares")
        
        # Test getting account positions
        positions = trader.api.list_positions()
        print(f"✓ Total open positions: {len(positions)}")
        
        if len(positions) > 0:
            for pos in positions[:3]:  # Show first 3 positions
                print(f"  - {pos.symbol}: {pos.qty} shares, P&L: ${float(pos.unrealized_pl):.2f}")
        
    except Exception as e:
        print(f"✗ Position check failed: {e}")
        return False
    
    # Step 7: Test Market Hours
    print("\n7. Testing Market Status...")
    try:
        clock = trader.api.get_clock()
        print(f"✓ Market is {'OPEN' if clock.is_open else 'CLOSED'}")
        print(f"✓ Next open: {clock.next_open}")
        print(f"✓ Next close: {clock.next_close}")
        
    except Exception as e:
        print(f"✗ Market status check failed: {e}")
        return False
    
    # Summary
    print("\n" + "=" * 60)
    print("✓ ALL TESTS PASSED! Your trading system is working correctly.")
    print("=" * 60)
    print("\nNext steps:")
    print("1. Implement your full technical indicators")
    print("2. Add your ML model for signal generation")
    print("3. Test with small position sizes")
    print("4. Monitor performance and logs")
    
    return True


In [28]:
success = test_trading_system()

TESTING CORE TRADING SYSTEM

1. Testing Alpaca Connection...
✓ Alpaca connection successful!
✓ Account Status: ACTIVE
✓ Buying Power: $200,000.00

2. Testing Historical Data Retrieval...
✓ Retrieved 2514 days of data for AAPL
✓ Date range: 2015-08-18 to 2025-08-15
✓ Latest price: $231.59
✓ Data type: <class 'pandas.core.series.Series'>

3. Testing Basic Feature Creation...
✓ Created 5 basic features
✓ Feature data shape: (2465, 5)
✓ Latest RSI: 68.05

4. Testing Signal Generation...
✓ Generated signals for 2465 days
✓ Buy signals: 965
✓ Sell signals: 699
✓ Hold signals: 801
✓ Latest signal: 1 (BUY)

5. Testing Simple Backtest...
✓ Backtest completed over 2464 days
✓ Total Return: 714.15%
✓ Sharpe Ratio: 1.00
✓ Max Drawdown: 19.61%

6. Testing Position Management...
✓ Current position in AAPL: 0.0 shares
✓ Total open positions: 0

7. Testing Market Status...
✓ Market is CLOSED
✓ Next open: 2025-08-18 09:30:00-04:00
✓ Next close: 2025-08-18 16:00:00-04:00

✓ ALL TESTS PASSED! Your tradin