In [25]:
from SimulateDay import stock_market_simulation, scale_data,train_Optimal_Action, _select_stock, get_stock_data, add_columns, train_Optimal_Action
import xgboost as xgb
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import numpy as np 
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

def importModels(symbol):
    specificModel = joblib.load(f"models/{symbol}_model.pkl")
    noCovidModel = train_Optimal_Action(symbol=symbol, action_column='Action')
    generalModel = xgb.Booster()    
    generalModel.load_model('models/all_stocks_incremental_model.pkl') 
    return specificModel, generalModel, noCovidModel


In [2]:

specificModel, generalModel, noCovidModel = importModels('AAPL')


Loading data for AAPL ...
Adding columns for AAPL...


  '10_Day_Return', '20_Day_Return', '50_Day_Return', '200_Day_Return']].idxmax(axis=1)
  '10_Day_Return', '20_Day_Return', '50_Day_Return', '200_Day_Return']].idxmax(axis=1)


Training model for AAPL...
Saving model for AAPL...


In [3]:
features = ['Volume', 'MA_10', 'MA_20', 'MA_50', 'MA_200', 'std_10',
                'std_20', 'std_50', 'std_200', 'upper_band_10', 'lower_band_10',
                'upper_band_20', 'lower_band_20', 'upper_band_50', 'lower_band_50',
                'upper_band_200', 'lower_band_200', 'Golden_Cross_Short', 'Golden_Cross_Medium',
                'Golden_Cross_Long', 'Death_Cross_Short', 'Death_Cross_Medium', 'Death_Cross_Long',
                'ROC', 'AVG_Volume_10', 'AVG_Volume_20', 'AVG_Volume_50', 'AVG_Volume_200', 'Doji',
                'Bullish_Engulfing', 'Bearish_Engulfing', 'MACD', 'Signal', 'MACD_Hist', 'TR', 'ATR',
                'RSI_10_Day', '10_Day_ROC', 'Resistance_10_Day', 'Support_10_Day', 'Resistance_20_Day',
                'Support_20_Day', 'Resistance_50_Day', 'Support_50_Day', 'Volume_MA_10', 'Volume_MA_20',
                'Volume_MA_50', 'OBV', 'Z-score']

In [5]:
def train_classification_models(X_train, y_train, X_test, y_test):
    models = {
        "Logistic Regression": LogisticRegression(random_state=42),
        "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
        "SVM": SVC(random_state=42),
        "XGBoost": xgb.XGBClassifier(n_estimators=100, random_state=42)
    }
    
    results = {}
    
    for name, model in models.items():
        print(f"Training {name}...")
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        report = classification_report(y_test, y_pred, output_dict=True)
        
        results[name] = {
            "Model": model,
            "Accuracy": accuracy,
            "Classification Report": report
        }
        
        print(f"--- {name} Results ---")
        print(f"Accuracy: {accuracy}")
        print(classification_report(y_test, y_pred))
    
    return results


In [6]:
def run_simulations_on_models(X_train, y_train, X_test, y_test, stock_data, initial_cash, days):
    # Train different models
    trained_models = train_classification_models(X_train, y_train, X_test, y_test)
    
    # Dictionary to store simulation results for each model
    simulation_results = {}
    
    # Run stock market simulation for each trained model
    for model_name, model_data in trained_models.items():
        model = model_data['Model']
        print(f"Running stock market simulation for {model_name}...")
        
        # Run the simulation
        simulation_df, final_cash = stock_market_simulation(model, initial_cash, days, stock_data)
        
        # Store the simulation results
        simulation_results[model_name] = simulation_df
        
    return simulation_results

In [28]:
stock_df = get_stock_data('MSFT')
print(f'Adding columns...')
stock_df = add_columns(stock_df)
print(f'Scaling data...')
preprocessed = scale_data(stock_df)
X = preprocessed[features]
y = stock_df['Action']
# y = y.map({'Buy': 0, 'Sell': 1, 'Hold': 2})
X_train, _, y_train, _ = train_test_split(
    X, y, test_size=0.3, random_state=42)

Adding columns...


  '10_Day_Return', '20_Day_Return', '50_Day_Return', '200_Day_Return']].idxmax(axis=1)


Scaling data...


  '10_Day_Return', '20_Day_Return', '50_Day_Return', '200_Day_Return']].idxmax(axis=1)


In [45]:
simResults = run_simulations_on_models(X_train, y_train, X_train, y_train,get_stock_data('AAPL').tail(365) , 10000, 365)

Training Logistic Regression...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


--- Logistic Regression Results ---
Accuracy: 0.8470995005762582
              precision    recall  f1-score   support

           0       0.83      0.95      0.88      1255
           1       0.85      0.86      0.85       753
           2       0.91      0.62      0.74       595

    accuracy                           0.85      2603
   macro avg       0.86      0.81      0.82      2603
weighted avg       0.85      0.85      0.84      2603

Training Random Forest...
--- Random Forest Results ---
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1255
           1       1.00      1.00      1.00       753
           2       1.00      1.00      1.00       595

    accuracy                           1.00      2603
   macro avg       1.00      1.00      1.00      2603
weighted avg       1.00      1.00      1.00      2603

Training SVM...
--- SVM Results ---
Accuracy: 0.7940837495197849
              precision    recall  f1-

  '10_Day_Return', '20_Day_Return', '50_Day_Return', '200_Day_Return']].idxmax(axis=1)
  modelDecisionDf = pd.concat(


Running stock market simulation for Random Forest...


  '10_Day_Return', '20_Day_Return', '50_Day_Return', '200_Day_Return']].idxmax(axis=1)
  modelDecisionDf = pd.concat(


Running stock market simulation for SVM...


  '10_Day_Return', '20_Day_Return', '50_Day_Return', '200_Day_Return']].idxmax(axis=1)
  modelDecisionDf = pd.concat(


Running stock market simulation for XGBoost...


  '10_Day_Return', '20_Day_Return', '50_Day_Return', '200_Day_Return']].idxmax(axis=1)
  modelDecisionDf = pd.concat(


In [37]:
specific_model = joblib.load('models/AAPL_model.pkl')
# specific_model = train_Optimal_Action(symbol='MSFT', action_column='Action')
specificdf = stock_market_simulation(specific_model, 10000, 365, get_stock_data('AAPL').tail(365), print_results=True)
specificdf

  '10_Day_Return', '20_Day_Return', '50_Day_Return', '200_Day_Return']].idxmax(axis=1)


Day 0: Bought 1 share at 168.5399932861328, Cash left: 9831.460006713867
Day 1: Bought 1 share at 167.4499969482422, Cash left: 9664.010009765625
Day 2: Bought 1 share at 165.7899932861328, Cash left: 9498.220016479492
Day 4: Bought 1 share at 173.5, Cash left: 9324.720016479492
Day 5: Bought 1 share at 171.77000427246094, Cash left: 9152.950012207031
Day 6: Bought 1 share at 173.55999755859375, Cash left: 8979.390014648438


  modelDecisionDf = pd.concat(


Day 7: Bought 1 share at 173.75, Cash left: 8805.640014648438
Day 8: Bought 1 share at 172.57000732421875, Cash left: 8633.070007324219
Day 9: Bought 1 share at 172.07000732421875, Cash left: 8461.0
Day 10: Bought 1 share at 172.07000732421875, Cash left: 8288.929992675781
Day 11: Bought 1 share at 172.69000244140625, Cash left: 8116.239990234375
Day 12: Bought 1 share at 175.0500030517578, Cash left: 7941.189987182617
Day 13: Bought 1 share at 175.16000366210938, Cash left: 7766.029983520508
Day 14: Bought 1 share at 174.1999969482422, Cash left: 7591.829986572266
Day 15: Bought 1 share at 171.55999755859375, Cash left: 7420.269989013672
Day 16: Bought 1 share at 171.83999633789062, Cash left: 7248.429992675781
Day 17: Bought 1 share at 172.99000549316406, Cash left: 7075.439987182617
Day 18: Bought 1 share at 175.42999267578125, Cash left: 6900.009994506836
Day 19: Bought 1 share at 177.3000030517578, Cash left: 6722.709991455078
Day 20: Bought 1 share at 177.25, Cash left: 6545.4599

(    Stock Name  Day Action         Cash Shares Held  Portfolio Value  \
 0         AAPL    0    Buy  9831.460007           1     10000.000000   
 1         AAPL    1    Buy  9664.010010           2      9998.910004   
 2         AAPL    2    Buy  9498.220016           3      9995.589996   
 3         AAPL    3   Hold  9498.220016           3     10018.930038   
 4         AAPL    4    Buy  9324.720016           4     10018.720016   
 ..         ...  ...    ...          ...         ...              ...   
 360       AAPL  360   Sell   690.130081   51.504392     12108.138840   
 361       AAPL  361   Sell   915.900085   50.504392     12318.276853   
 362       AAPL  362    Buy   686.360092   51.504392     12508.677856   
 363       AAPL  363    Buy   457.320099   52.504392     12482.925660   
 364       AAPL  364    Buy   229.770096   53.504392     12404.694629   
 
      Stock Price       Date  
 0     168.539993 2023-05-02  
 1     167.449997 2023-05-03  
 2     165.789993 2023-05-04 

In [50]:
simResults['Logistic Regression']
simResults['Random Forest']
simResults['SVM']
simResults['XGBoost']

Unnamed: 0,Stock Name,Day,Action,Cash,Shares Held,Portfolio Value,Stock Price,Date
0,AAPL,0,Sell,10000,0,10000.000000,168.539993,2023-05-02
1,AAPL,1,Sell,10000,0,10000.000000,167.449997,2023-05-03
2,AAPL,2,Sell,10000,0,10000.000000,165.789993,2023-05-04
3,AAPL,3,Hold,10000,0,10000.000000,173.570007,2023-05-05
4,AAPL,4,Buy,9826.5,1,10000.000000,173.500000,2023-05-08
...,...,...,...,...,...,...,...,...
360,AAPL,360,Sell,1366.480087,45.810226,11522.149149,221.690002,2024-10-07
361,AAPL,361,Hold,1366.480087,45.810226,11709.054954,225.770004,2024-10-08
362,AAPL,362,Buy,1136.940094,46.810226,11881.759002,229.539993,2024-10-09
363,AAPL,363,Buy,907.900101,47.810226,11858.353889,229.039993,2024-10-10


In [None]:
MODELCOMPARISON = pd.DataFrame({
    'Model': ['Logistic Regression', 'Random Forest', 'SVM', 'XGBoost','Specific Model'],
    'Stock': ['MSFT', 'MSFT', 'MSFT', 'MSFT','MSFT','AAPL','AAPL','AAPL','AAPL','AAPL'],
    'Final Portfolio Value': [11498.05,11326.80,11579.19,11813.78,11774.20,12012.813268,11780.739586,11525.277583,11787.117120,12404.69462861876]
    'Total Profit':[ 1498.05,1326.80,1579.19,1813.78,1774.20,1]
})