In [80]:
import pandas as pd
import joblib   
import numpy as np
from sklearn.model_selection import train_test_split
from SimulateDay import scale_data, predict_action, get_stock_data, add_columns, stock_market_simulation
import xgboost as xgb
import joblib
import altair as alt
import yfinance as yf

def get_stock_movement(symbol):
    data = yf.download(symbol, period="1y")
    if len(data) > 0:
        initial_price = data['Close'].iloc[0]
        latest_price = data['Close'].iloc[-1]
        return ((latest_price - initial_price) / initial_price) * 100
    return 0

import plotly.subplots as sp
import plotly.graph_objects as go
def get_final_portfolio_values(df):
    # Group by 'Stock Name' and get the last row for each group
    final_values = df.groupby('Stock Name').apply(lambda x: x.iloc[-1])
    
    # Extract 'Stock Name' and 'Portfolio Value' columns
    result = final_values[['Stock Name', 'Portfolio Value','Shares Held']].reset_index(drop=True)
    
    return result




In [11]:
sim_results = pd.read_csv('simResults/sim_results.csv')
specific_model_decisions = pd.read_csv('simResults/specific_model_decisions.csv').sort_values(by=['Stock Name', 'Day'])
general_model_decisions = pd.read_csv('simResults/general_model_decisions.csv').sort_values(by=['Stock Name', 'Day'])
LGBM_model_decisions = pd.read_csv('simResults/LGBM_model_decisions.csv').sort_values(by=['Stock Name', 'Day'])
# general_model_decisions.to_csv('simResults/general_model_decisions.csv', index=False)
# specific_model_decisions.to_csv('simResults/specific_model_decisions.csv', index=False)

In [12]:
LGBM_model_decisions.head(11)

Unnamed: 0,Stock Name,Day,Action,Stock Price,Cash,Shares Held,Portfolio Value,Date
72,A,0,Hold,143.820007,10000.0,0.0,10000.0,2024-10-11
73,A,1,Hold,144.25,10000.0,0.0,10000.0,2024-10-14
74,A,2,Hold,144.580002,10000.0,0.0,10000.0,2024-10-15
75,A,3,Buy,139.110001,9860.889999,1.0,10000.0,2024-10-16
76,A,4,Hold,137.419998,9860.889999,1.0,9998.309998,2024-10-17
77,A,5,Hold,137.419998,9860.889999,1.0,9998.309998,2024-10-17
78,A,6,Hold,138.744995,9860.889999,1.0,9999.634995,2024-10-18
79,A,7,Hold,138.634995,9860.889999,1.0,9999.524994,2024-10-18
4257,A,8,Sell,136.759995,10000.0,0.0,10000.0,2024-10-21
4758,A,9,Sell,133.570007,10000.0,0.0,10000.0,2024-10-22


In [13]:
specific_model_decisions.head(11)

Unnamed: 0,Stock Name,Day,Action,Stock Price,Cash,Shares Held,Portfolio Value,Date
0,A,0,Hold,144.800003,10000.0,0.0,10000.0,2024-10-09
1,A,1,Sell,142.740005,10000.0,0.0,10000.0,2024-10-10
2,A,2,Sell,143.820007,10000.0,0.0,10000.0,2024-10-11
3,A,3,Sell,144.25,10000.0,0.0,10000.0,2024-10-14
4,A,4,Sell,144.580002,10000.0,0.0,10000.0,2024-10-15
5,A,5,Sell,138.830002,10000.0,0.0,10000.0,2024-10-16
6,A,6,Sell,138.245697,10000.0,0.0,10000.0,2024-10-17
7,A,7,Sell,138.774994,10000.0,0.0,10000.0,2024-10-18
8,A,8,Sell,136.869995,10000.0,0.0,10000.0,2024-10-21
9,A,9,Sell,133.570007,10000.0,0.0,10000.0,2024-10-22


In [14]:
LGBM_model_decisions[LGBM_model_decisions['Action'] == 'Buy']

Unnamed: 0,Stock Name,Day,Action,Stock Price,Cash,Shares Held,Portfolio Value,Date
75,A,3,Buy,139.110001,9860.889999,1.0,10000.000000,2024-10-16
312,AAPL,0,Buy,233.850006,9766.149994,1.0,10000.000000,2024-10-15
318,AAPL,6,Buy,235.100006,9762.829987,1.0,9997.929993,2024-10-18
319,AAPL,7,Buy,235.134995,9527.694992,2.0,9997.964981,2024-10-18
26,ABBV,2,Buy,191.860001,9808.139999,1.0,10000.000000,2024-10-15
...,...,...,...,...,...,...,...,...
4005,ZBRA,5,Buy,375.500000,9244.940002,2.0,9995.940002,2024-10-16
4006,ZBRA,6,Buy,373.839996,8871.100006,3.0,9992.619995,2024-10-17
4007,ZBRA,7,Buy,375.450012,8495.649994,4.0,9997.450043,2024-10-18
4020,ZTS,4,Buy,192.389999,9807.610001,1.0,10000.000000,2024-10-15


In [15]:
specific_model_decisions[specific_model_decisions['Action']  == 'Buy']

Unnamed: 0,Stock Name,Day,Action,Stock Price,Cash,Shares Held,Portfolio Value,Date
10,AAPL,0,Buy,229.539993,9770.460007,1.0,10000.000000,2024-10-09
11,AAPL,1,Buy,229.039993,9541.420013,2.0,9999.500000,2024-10-10
47,ACGL,1,Buy,113.900002,9886.099998,1.0,10000.000000,2024-10-10
56,ACN,0,Buy,365.079987,9634.920013,1.0,10000.000000,2024-10-09
60,ACN,4,Buy,368.660004,9266.260010,2.0,10003.580017,2024-10-15
...,...,...,...,...,...,...,...,...
4960,ZBH,5,Buy,105.209999,9894.790001,1.0,10000.000000,2024-10-16
4961,ZBH,6,Buy,105.029999,9894.970001,1.0,10000.000000,2024-10-17
4962,ZBH,7,Buy,106.535004,9893.464996,1.0,10000.000000,2024-10-18
4963,ZBH,8,Buy,105.239998,9894.760002,1.0,10000.000000,2024-10-21


In [18]:
specific_model_decisions[(specific_model_decisions['Portfolio Value'] > 10000) & (specific_model_decisions['Day'] == specific_model_decisions['Day'].max())]

Unnamed: 0,Stock Name,Day,Action,Stock Price,Cash,Shares Held,Portfolio Value,Date
19,AAPL,9,Sell,234.899994,10005.360001,0.0,10005.360001,2024-10-22
65,ACN,9,Buy,371.945007,9262.975006,2.0,10006.865021,2024-10-22
135,AEP,9,Buy,100.010002,9802.269997,2.0,10002.290001,2024-10-22
195,AKAM,9,Buy,105.239998,9791.650002,2.0,10002.129997,2024-10-22
295,AMP,9,Buy,518.960022,8987.799988,2.0,10025.720032,2024-10-22
...,...,...,...,...,...,...,...,...
4264,SYK,9,Buy,366.109406,9279.930603,2.0,10012.149414,2024-10-22
4354,TFC,9,Buy,43.195000,9914.174999,2.0,10000.564999,2024-10-22
4364,TFX,9,Buy,235.669998,9529.919998,2.0,10001.259995,2024-10-22
4524,TXT,9,Buy,87.239998,9826.420006,2.0,10000.900002,2024-10-22


In [19]:
LGBM_model_decisions[(LGBM_model_decisions['Portfolio Value'] > 10000) & (LGBM_model_decisions['Day'] == LGBM_model_decisions['Day'].max())]

Unnamed: 0,Stock Name,Day,Action,Stock Price,Cash,Shares Held,Portfolio Value,Date
4525,AAPL,9,Sell,234.899994,10005.360001,0.0,10005.360001,2024-10-22
4561,ACN,9,Sell,371.945007,10006.865021,0.0,10006.865021,2024-10-22
4707,AEP,9,Sell,100.010002,10002.290001,0.0,10002.290001,2024-10-22
4953,AKAM,9,Sell,105.239998,10002.129997,0.0,10002.129997,2024-10-22
4734,AMP,9,Sell,518.960022,10025.720032,0.0,10025.720032,2024-10-22
...,...,...,...,...,...,...,...,...
4596,SYK,9,Sell,366.109406,10012.149414,0.0,10012.149414,2024-10-22
4696,TFC,9,Sell,43.195000,10000.564999,0.0,10000.564999,2024-10-22
4992,TFX,9,Sell,235.669998,10001.259995,0.0,10001.259995,2024-10-22
4942,TXT,9,Sell,87.239998,10000.900002,0.0,10000.900002,2024-10-22


In [21]:
specific_model_decisions.describe()

Unnamed: 0,Day,Stock Price,Cash,Shares Held,Portfolio Value
count,4985.0,4985.0,4985.0,4985.0,4985.0
mean,4.517954,222.859229,9894.028032,0.467464,10000.54882
std,2.871815,524.978414,515.047396,0.778845,9.482548
min,0.0,7.52,0.0,0.0,9747.400391
25%,2.0,69.605003,9927.760002,0.0,10000.0
50%,5.0,125.885002,10000.0,0.0,10000.0
75%,7.0,233.119995,10000.0,1.0,10000.0
max,9.0,9916.605469,10089.529785,4.0,10289.005859


In [23]:
LGBM_model_decisions.describe()

Unnamed: 0,Day,Stock Price,Cash,Shares Held,Portfolio Value
count,5027.0,5027.0,5027.0,5027.0,5027.0
mean,4.497712,223.06702,9904.681051,0.475433,9999.755068
std,2.871873,522.528621,325.321467,0.92256,6.299297
min,0.0,7.52,226.419922,0.0,9747.400391
25%,2.0,69.634998,9943.4,0.0,10000.0
50%,4.0,126.690002,10000.0,0.0,10000.0
75%,7.0,233.479996,10000.0,1.0,10000.0
max,9.0,9924.400391,10082.654785,10.0,10150.820312


In [111]:
final_portfolio_values = get_final_portfolio_values(sim_results)#.drop(simResults[simResults['Stock Name']=='NFLX'].index))
final_portfolio_values['Profit %'] = (final_portfolio_values['Portfolio Value'] - 10000) / 10000 * 100
alt.Chart(final_portfolio_values).mark_bar().encode(
    x='Stock Name',
    y='Profit %',
    color=alt.condition(
        alt.datum['Profit %'] > 0,
        alt.value('green'),
        alt.value('red')
    )
).properties(
    title='Final Portfolio Value by Stock',
    width=800,
    height=400
).configure_axis(
    labelAngle=45
).display()

In [112]:
final_portfolio_values.describe() 

Unnamed: 0,Portfolio Value,Shares Held,Profit %
count,18.0,18.0,18.0
mean,10840.165268,46.533595,8.401653
std,1414.583655,85.426197,14.145837
min,9921.349977,0.0,-0.7865
25%,10079.935795,11.75,0.799358
50%,10197.988786,19.208332,1.979888
75%,10541.500292,41.0,5.415003
max,15113.500288,374.0,51.135003


In [113]:
sim_results['Profit %'] = (sim_results['Portfolio Value'] - 10000) / 10000 * 100
sim_results.describe()

Unnamed: 0,Day,Stock Price,Cash,Shares Held,Portfolio Value,Profit %
count,6570.0,6570.0,6570.0,6570.0,6570.0,6570.0
mean,182.0,161.080648,7409.005335,25.399711,10350.674098,3.506741
std,105.374048,153.169143,3336.277504,49.513217,1056.824854,10.568249
min,0.0,9.83,0.0,-0.884297,8260.710098,-17.392899
25%,91.0,40.869999,6386.832497,2.0,9984.420002,-0.1558
50%,182.0,105.884998,8907.330009,11.0,10023.335014,0.23335
75%,273.0,229.029995,9881.982492,29.0,10207.664939,2.076649
max,364.0,771.167419,15191.869995,389.0,17111.548602,71.115486


In [91]:
final_portfolio_values.describe()

Unnamed: 0,Portfolio Value,Shares Held,Profit %
count,18.0,18.0,18.0
mean,10756.80629,43.034536,7.568063
std,1230.685076,80.641113,12.306851
min,9915.389959,-0.602926,-0.8461
25%,10065.476938,2.32704,0.654769
50%,10264.278403,14.944983,2.642784
75%,10510.354038,43.25,5.10354
max,14642.950491,342.0,46.429505


In [92]:
all_stock_movement = []
total_movement = 0
for stock in sim_results['Stock Name'].unique():
    all_stock_movement.append([stock, get_stock_movement(stock)])
    total_movement += get_stock_movement(stock)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [95]:
total_movement/len(sim_results['Stock Name'].unique())


33.24928991723444

In [2]:
import pandas as pd
sp500 = pd.read_csv('data/sp500_stocks.csv')
sp500 = sp500.sort_values(by=['Symbol','Date'])

Unnamed: 0,Date,Symbol,Adj Close,Close,High,Low,Open,Volume
0,2010-01-04,A,20.434929,22.389128,22.625179,22.267525,22.453505,3815561.0
1,2010-01-05,A,20.212959,22.145924,22.331903,22.002861,22.324751,4186031.0
2,2010-01-06,A,20.141132,22.067240,22.174536,22.002861,22.067240,3243779.0
3,2010-01-07,A,20.115025,22.038628,22.045780,21.816881,22.017166,3095172.0
4,2010-01-08,A,20.108498,22.031473,22.067240,21.745352,21.917025,3733918.0
...,...,...,...,...,...,...,...,...
1873262,2024-10-15,ZTS,192.389999,192.389999,195.820007,191.940002,192.279999,1414600.0
1873263,2024-10-16,ZTS,194.440002,194.440002,195.880005,191.854996,192.000000,1591500.0
1873264,2024-10-17,ZTS,191.000000,191.000000,196.550003,190.889999,195.710007,1701200.0
1873265,2024-10-18,ZTS,193.279999,193.279999,193.485001,190.580002,195.710007,1242013.0
