In [None]:
import yfinance as yf

start_date = '2010–01–01'
end_train = '2018–12–31'
end_date = '2023–12–31'
tickers = ['AAPL', 'MSFT', 'AMZN', 'GOOGL']

def get_data(ticker):
 df = yf.download(ticker, start=start_date, end=end_date)
 # … (preprocessing steps)
 return df

dfs = {ticker: get_data(ticker) for ticker in tickers}

In [None]:
pip install D:\xuexi\TA_Lib-0.4.29-cp39-cp39-win_amd64.whl

In [4]:
import talib as ta 
def engineer_features(df):
    df["zscore"] = (df["Adj Close"] - df["Adj Close"].rolling(15).mean()) / df['Adj Close'].rolling(15).std() 
    df["aroon"] = ta.AROONOSC(df['Adj High'],df["Adj Low"],timeperiod=14) 
    df["price_trend"] = df["Returns"].shift().rolling(4).sum() 

    df['rsi'] = ta.RSI(df['Adj Close'],timeperiod=14) 
    df['macd'],df['macd_signal'],_ = ta.MACD(df['Adj Close']) 
    df['bb_upper'],df['bb_middle'],df['bb_lower'] = ta.BBANDS(df['Adj Close']) 
    df['atr'] = ta.ATR(df['Adj High'], df['Adj Low'], df['Adj Close'], timeperiod=14)
    df['cci'] = ta.CCI(df['Adj High'], df['Adj Low'], df['Adj Close'], timeperiod=14)
 
    # categorize features
    for feature in ['zscore','aroon','price_trend','rsi','macd','cci']:
        df[feature] = pd.qcut(df[feature].rank(method='first'),10,labels=False) 
    return df.dropna()

In [None]:
pip install scikit-learn

In [None]:
from sklearn.feature_selection import SelectKBest, f_classif 
features = ["aroon", "zscore", "price_trend", "rsi", "macd", "cci", "atr"]
X = combined_df[features] 
y = combined_df["Target_cat"] 
selector = SelectKBest(f_classif,k=5) 
X_train_selected = selector.fit_transform(X_train,y_train) 
X_test_selected = selector.transform(X_test)

In [None]:
pip install xgboost

In [None]:
from sklearn.pipeline import Pipeline 
from sklearn.model_selection import GridSearchCV 
from xgboost import XGBClassifier 
from sklearn.preprocessing import StandardScaler

pipeline = Pipeline([
    ('scaler',StandardScaler()),
    ('classifier',XGBClassifier(use_label_encoder=False,eval_metric='logloss')) 

])
param_grid = {
    'classifier__n_estimators':[100,200,300],
    'classifier__learning_rate':[0.01,0.1,0.3],
    'classifier__max_depth':[3,5,7]
}
grid_search = GridSearchCV(pipeline,param_grid,cv=5,scoring='accuracy',n_jobs=-1) 
grid_search.fit(X_train_selected,y_train) 
best_model = grid_search.best_estimator_

In [None]:
y_pred = best_model.predict(X_test_selected) 
print(classification_report(y_test,y_pred)) 

In [None]:
combined_df['Signal'] = np.where(best_model.predict(selector.transform(X))==1,1,-1) 
combined_df['Model_Returns'] = combined_df['Signal'] * combined_df['Target']

for ticker in tickers:
 combined_df.loc[ticker, 'Cumulative_Returns'] = (1 + combined_df.loc[ticker, 'Model_Returns']).cumprod()

In [None]:
risk_free_rate = 0.02 
annual_returns = combined_df.groupby('level_0')['Model_Returns'].mean() * 252 
annual_volatility = combined_df.groupby('level_0')['Model_Returns'].std() * np.sqrt(252)

sharpe_ratio = (annual_returns - risk_free_rate) / annual_volatility
print("Sharpe Ratios:")
print(sharpe_ratio)

In [None]:
import plotly.express as px

fig = px.line(combined_df.reset_index(), x='Date', y='Cumulative_Returns', color='level_0',
 labels={'y': 'Cumulative Returns', 'level_0': 'Stock'},
 title='Multi-Stock Trading Strategy Performance',
 line_shape='linear')
fig.show()