In [7]:
!pip -q install ta yfinance xgboost plotly scikit-learn PyPortfolioOpt

In [9]:
# Import Libraries
import warnings, numpy as np, pandas as pd
warnings.filterwarnings("ignore")

import yfinance as yf
import ta  # technical indicators (stable alternative to pandas_ta)
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report

import plotly.express as px
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage
from pypfopt.efficient_frontier import EfficientFrontier, EfficientCVaR
from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices

np.random.seed(42)

In [10]:
# Settings
TICKERS = ["AAPL", "MSFT", "GOOG", "AMZN", "META"]   # change as you like
START   = "2014-01-01"
END     = "2025-01-01"
PORTFOLIO_VALUE = 100_000

In [12]:
# Download adjusted close prices
prices = yf.download(TICKERS, start=START, end=END, auto_adjust=True)["Close"]
prices = prices.dropna(how="all")
returns = prices.pct_change().dropna()

print(f"\nData shape: prices={prices.shape}, returns={returns.shape}")

[*********************100%***********************]  5 of 5 completed


Data shape: prices=(2768, 5), returns=(2767, 5)





In [13]:
# Build features with `ta` per ticker
def make_features(series_close: pd.Series) -> pd.DataFrame:
    """Return a DataFrame with indicators built from a close-price series."""
    df = pd.DataFrame({"Close": series_close})
    # RSI
    df["RSI"] = ta.momentum.RSIIndicator(close=df["Close"], window=14).rsi()
    # MACD
    macd = ta.trend.MACD(close=df["Close"])
    df["MACD"] = macd.macd()
    # Bollinger Bands
    bb = ta.volatility.BollingerBands(close=df["Close"], window=20, window_dev=2)
    df["BB_high"] = bb.bollinger_hband()
    df["BB_low"]  = bb.bollinger_lband()
    # Volatility (rolling stdev of daily returns)
    df["Volatility"] = df["Close"].pct_change().rolling(21).std()
    # Label: next-day return (for direction)
    df["NextRet"] = df["Close"].pct_change().shift(-1)
    return df

feat_list = []
for t in TICKERS:
    f = make_features(prices[t].dropna())
    f["Ticker"] = t
    feat_list.append(f)

features = pd.concat(feat_list, axis=0)
features = features.dropna().copy()

# Classification label: 1 if next-day return > 0 else 0
features["Up"] = (features["NextRet"] > 0).astype(int)

X = features[["RSI", "MACD", "BB_high", "BB_low", "Volatility"]]
y = features["Up"]

# Chronological split to avoid leakage
split_idx = int(len(features) * 0.8)
X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]


In [14]:
# Train XGBoost next-day direction model
clf = xgb.XGBClassifier(
    n_estimators=400,
    max_depth=5,
    learning_rate=0.05,
    subsample=0.9,
    colsample_bytree=0.9,
    eval_metric="logloss",
    tree_method="hist"
)
clf.fit(X_train, y_train)

pred_test = clf.predict(X_test)
proba_test = clf.predict_proba(X_test)[:, 1]
acc  = accuracy_score(y_test, pred_test)
auc  = roc_auc_score(y_test, proba_test)

print(f"\nXGBoost next-day direction — Accuracy: {acc:.3f}, ROC-AUC: {auc:.3f}")
print(classification_report(y_test, pred_test, digits=3))


XGBoost next-day direction — Accuracy: 0.509, ROC-AUC: 0.523
              precision    recall  f1-score   support

           0      0.477     0.425     0.450      1295
           1      0.531     0.584     0.556      1447

    accuracy                          0.509      2742
   macro avg      0.504     0.504     0.503      2742
weighted avg      0.506     0.509     0.506      2742



In [15]:
# Latest per-ticker ML signal (today's prob of going up)

# Take the last available feature row for each ticker
last_feats = (
    features.groupby("Ticker")
            .tail(1)
            .set_index("Ticker")
)

last_X = last_feats[["RSI", "MACD", "BB_high", "BB_low", "Volatility"]]
last_feats["ProbUp"] = clf.predict_proba(last_X)[:, 1]
ml_signal = last_feats["ProbUp"].sort_values(ascending=False)
print("\nLatest ML signal (Prob of Up by ticker):")
print(ml_signal)

# Visualize signal
fig = px.bar(ml_signal, title="Latest ML probability of positive return (per ticker)")
fig.update_layout(yaxis_title="Prob(Up)")
fig.show()


Latest ML signal (Prob of Up by ticker):
Ticker
AMZN    0.404742
META    0.398349
GOOG    0.397875
AAPL    0.383231
MSFT    0.219103
Name: ProbUp, dtype: float32


In [17]:
# Portfolio Optimization

mu_all = mean_historical_return(prices)                       # expected returns
S_all  = CovarianceShrinkage(prices).ledoit_wolf()            # robust covariance

# (A) Max Sharpe on all tickers
ef = EfficientFrontier(mu_all, S_all)
w_sharpe = ef.max_sharpe()
w_sharpe = ef.clean_weights()
print("\nMax-Sharpe weights (all tickers):")
print(w_sharpe)

# (B) Min-CVaR on top-N by ML signal
TOP_N = min(5, len(TICKERS))
top_tickers = list(ml_signal.head(TOP_N).index)
ret_subset = returns[top_tickers].dropna()
mu_sub = mean_historical_return(prices[top_tickers])
ef_cvar = EfficientCVaR(mu_sub, ret_subset)
w_cvar = ef_cvar.min_cvar()
w_cvar = ef_cvar.clean_weights()
print(f"\nMin-CVaR weights (top {TOP_N} ML tickers {top_tickers}):")
print(w_cvar)


Max-Sharpe weights (all tickers):
OrderedDict({'AAPL': 0.44159, 'AMZN': 0.06026, 'GOOG': 0.0, 'META': 0.0, 'MSFT': 0.49814})

Min-CVaR weights (top 5 ML tickers ['AMZN', 'META', 'GOOG', 'AAPL', 'MSFT']):
OrderedDict({'AMZN': 0.04783, 'META': 0.0, 'GOOG': 0.23776, 'AAPL': 0.3414, 'MSFT': 0.37301})


In [18]:
# Discrete allocation for both portfolios
latest_prices = get_latest_prices(prices)

da_sharpe = DiscreteAllocation(w_sharpe, latest_prices, total_portfolio_value=PORTFOLIO_VALUE)
alloc_sharpe, cash_left_sharpe = da_sharpe.lp_portfolio()

da_cvar = DiscreteAllocation(w_cvar, latest_prices, total_portfolio_value=PORTFOLIO_VALUE)
alloc_cvar, cash_left_cvar = da_cvar.lp_portfolio()

print("\nDiscrete allocation (Max-Sharpe):", alloc_sharpe, f" | Cash left: ${cash_left_sharpe:.2f}")
print(  "Discrete allocation (Min-CVaR): ", alloc_cvar,  f" | Cash left: ${cash_left_cvar:.2f}")


Discrete allocation (Max-Sharpe): {'AAPL': 177, 'AMZN': 27, 'MSFT': 119}  | Cash left: $24.53
Discrete allocation (Min-CVaR):  {'AMZN': 19, 'GOOG': 126, 'AAPL': 58, 'MSFT': 89}  | Cash left: $107.69


In [19]:
# Quick backtest comparison (equal-weight vs Max-Sharpe)

eq_w = np.repeat(1/len(TICKERS), len(TICKERS))
cum_eq  = (1 + (returns * eq_w).sum(axis=1)).cumprod()

w_vec   = pd.Series(w_sharpe).reindex(returns.columns).fillna(0).values
cum_opt = (1 + (returns * w_vec).sum(axis=1)).cumprod()

cum_df = pd.concat([cum_eq.rename("EqualWeight"), cum_opt.rename("MaxSharpe")], axis=1).dropna()
fig2 = px.line(cum_df, title="Cumulative Growth: Equal-Weight vs Max-Sharpe")
fig2.update_layout(yaxis_title="Growth of $1")
fig2.show()