In [72]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
import plotly.express as px

#Global var
init_cap = 2000
price_limit = 5 # don't open position if price exceeded price_limit
risk_free_rate = 5.0

#directory path
dir_path =r'D:\OneDrive\stock\Leleka\BuyBeforeEarnings\inputForPython'
csv_files = [f for f in os.listdir(dir_path) if f.endswith('.csv')]

if not csv_files:
    print('No CSV file in directory')



In [73]:
# add stock list to filter data
df_stock = pd.read_excel('InputStocks.xlsx')


In [74]:
# Using the str.cat() method without the underscore separator
df_stock['Y_Symbol'] = df_stock['Year'].astype(str).str.cat(df_stock['Symbol'], sep='')




In [75]:
df_stock

Unnamed: 0,Year,Symbol,Y_Symbol
0,2007,XOM,2007 XOM
1,2008,XOM,2008 XOM
2,2009,XOM,2009 XOM
3,2010,XOM,2010 XOM
4,2011,AAPL,2011 AAPL
...,...,...,...
145,2017,BAC,2017 BAC
146,2018,V,2018 V
147,2019,PG,2019 PG
148,2020,JPM,2020 JPM


In [76]:
df_stock

Unnamed: 0,Year,Symbol,Y_Symbol
0,2007,XOM,2007 XOM
1,2008,XOM,2008 XOM
2,2009,XOM,2009 XOM
3,2010,XOM,2010 XOM
4,2011,AAPL,2011 AAPL
...,...,...,...
145,2017,BAC,2017 BAC
146,2018,V,2018 V
147,2019,PG,2019 PG
148,2020,JPM,2020 JPM


In [41]:
#df_stock.to_excel('df_stock_result.xlsx')

In [77]:
dataframes={}
all_data = []  # List to store all individual dataframes

for file in csv_files:
    file_path = os.path.join(dir_path, file)
    df = pd.read_csv(file_path)
    key_name = os.path.splitext(file)[0]
    dataframes[key_name] = df
    all_data.append(df)

# Concatenate all individual dataframes to form a single dataframe
combined_df = pd.concat(all_data, ignore_index=True)

In [20]:
#combined_df

In [78]:
# remove dollar symbol
combined_df['Profit/Loss'] = combined_df['Profit/Loss'].str.replace('$','', regex = False)
combined_df['Trade Price'] = combined_df['Trade Price'].str.replace('$','', regex = False)
combined_df['Stock Price'] = combined_df['Stock Price'].str.replace('$','', regex = False)
combined_df['Adjusted Stock Price'] = combined_df['Adjusted Stock Price'].str.replace('$','', regex = False)

# convert in numbers
combined_df['Profit/Loss']= pd.to_numeric(combined_df['Profit/Loss'],errors='coerce')
combined_df['Trade Price']= pd.to_numeric(combined_df['Trade Price'],errors='coerce')

# convert date 
combined_df['Date'] = pd.to_datetime(combined_df['Date'])
combined_df = combined_df.reset_index(drop=True)


In [22]:
#combined_df.to_excel('combined_df.xlsx')


In [79]:
# calculate CS open price
combined_df.loc[combined_df["Size"] == -1, "Trade Price"] *= -1  # if size -1 then we sold options it's reduce our calls costs
grouped_by_date = combined_df.groupby(['Date','Symbol'])[['Trade Price', 'Profit/Loss']].sum().reset_index()


In [80]:
# sorf dataframe
sorted_df = grouped_by_date.sort_values(by=['Symbol','Date'])


In [81]:
# delete result values of option
sorted_df.loc[sorted_df['Profit/Loss']!=0, 'Trade Price']=0


In [82]:
# Here I delete rows where price_limit exceed my limit


# Identify rows where "Trade Price" is greater than price_limit
to_drop = sorted_df[sorted_df['Trade Price'] > price_limit]

# For each of these rows, identify the next row with the same "Symbol" and a subsequent date
drop_indices = []
for idx, row in to_drop.iterrows():
    drop_indices.append(idx)
    next_row = sorted_df[(sorted_df['Symbol'] == row['Symbol']) & (sorted_df['Date'] > row['Date'])].head(1)
    if not next_row.empty:
        drop_indices.append(next_row.index[0])

# Drop these rows from the DataFrame
sorted_df = sorted_df.drop(drop_indices)


In [83]:
# sort by date to build chart
sorted_df = sorted_df.sort_values(by='Date')

In [28]:
#sorted_df.to_excel('sorted.xlsx')

In [84]:
# making cummulative sum of profit
df_chart = sorted_df.copy()
df_chart['CumSum'] = df_chart['Profit/Loss'].cumsum()

# add SPY price to compare

start_date = df_chart['Date'].min()
end_date = df_chart['Date'].max()

# Fetch SPY data for the given date range
spy_data = yf.download('SPY', start=start_date, end=end_date)

# Merge the data
df_chart = df_chart.merge(spy_data[['Close']], left_on='Date', right_index=True, how='left')
df_chart.rename(columns={'Close': 'SPY Price'}, inplace=True)

[*********************100%***********************]  1 of 1 completed


In [85]:
df_chart

Unnamed: 0,Date,Symbol,Trade Price,Profit/Loss,CumSum,SPY Price
0,2013-09-25,WFC,0.26,0.0,0.0,169.039993
1,2013-09-30,JNJ,0.87,0.0,0.0,168.009995
2,2013-10-02,GE,0.24,0.0,0.0,169.179993
4,2013-10-08,MSFT,0.32,0.0,0.0,165.479996
3,2013-10-08,AMZN,3.20,0.0,0.0,165.479996
...,...,...,...,...,...,...
791,2023-07-26,V,0.00,-82.0,11272.0,455.510010
792,2023-07-27,META,0.00,188.0,11460.0,452.489990
793,2023-07-31,XOM,0.00,-24.0,11436.0,457.790009
794,2023-08-04,AAPL,0.00,-137.0,11299.0,


In [86]:
# 1. Determine the price of SPY on the start date
start_date = df_chart['Date'].iloc[0]
spy_start_price = df_chart.loc[df_chart['Date'] == start_date, 'SPY Price'].iloc[0]

# 2. Calculate the number of SPY shares bought with the initial capital
num_shares = init_cap / spy_start_price

# 3. Calculate the value of those shares for each date in the DataFrame
df_chart['SPY_buy_hold'] = num_shares * df_chart['SPY Price']-init_cap
df_chart['SPY_buy_hold'] = df_chart['SPY_buy_hold'].round(2)

# making cummulative sum of profit of SPY
df_chart_comp = df_chart.copy()


In [87]:
df_chart_comp['Year'] = df_chart_comp['Date'].dt.year


In [88]:
df_chart_comp['Y_Symbol'] = df_chart_comp['Year'].astype(str).str.cat(df_chart_comp['Symbol'], sep='')


In [92]:
#df_chart_comp['Y_Symbol'] = df_chart_comp['Y_Symbol'].str.strip()
df_stock['Y_Symbol'] = df_stock['Y_Symbol'].str.replace(' ', '')

In [93]:
df_stock

Unnamed: 0,Year,Symbol,Y_Symbol
0,2007,XOM,2007XOM
1,2008,XOM,2008XOM
2,2009,XOM,2009XOM
3,2010,XOM,2010XOM
4,2011,AAPL,2011AAPL
...,...,...,...
145,2017,BAC,2017BAC
146,2018,V,2018V
147,2019,PG,2019PG
148,2020,JPM,2020JPM


In [94]:
df_chart_comp

Unnamed: 0,Date,Symbol,Trade Price,Profit/Loss,CumSum,SPY Price,SPY_buy_hold,Year,Y_Symbol
0,2013-09-25,WFC,0.26,0.0,0.0,169.039993,0.00,2013,2013WFC
1,2013-09-30,JNJ,0.87,0.0,0.0,168.009995,-12.19,2013,2013JNJ
2,2013-10-02,GE,0.24,0.0,0.0,169.179993,1.66,2013,2013GE
4,2013-10-08,MSFT,0.32,0.0,0.0,165.479996,-42.12,2013,2013MSFT
3,2013-10-08,AMZN,3.20,0.0,0.0,165.479996,-42.12,2013,2013AMZN
...,...,...,...,...,...,...,...,...,...
791,2023-07-26,V,0.00,-82.0,11272.0,455.510010,3389.38,2023,2023V
792,2023-07-27,META,0.00,188.0,11460.0,452.489990,3353.64,2023,2023META
793,2023-07-31,XOM,0.00,-24.0,11436.0,457.790009,3416.35,2023,2023XOM
794,2023-08-04,AAPL,0.00,-137.0,11299.0,,,2023,2023AAPL


In [40]:
#df_chart_comp.to_excel('df_chart_comp.xlsx')

In [95]:
# select only stocks from file 'InputStocks.xlsx'
df_chart_comp = df_chart_comp[df_chart_comp['Y_Symbol'].isin(df_stock['Y_Symbol'])]

In [96]:
df_chart_comp

Unnamed: 0,Date,Symbol,Trade Price,Profit/Loss,CumSum,SPY Price,SPY_buy_hold,Year,Y_Symbol
1,2013-09-30,JNJ,0.87,0.0,0.0,168.009995,-12.19,2013,2013JNJ
2,2013-10-02,GE,0.24,0.0,0.0,169.179993,1.66,2013,2013GE
4,2013-10-08,MSFT,0.32,0.0,0.0,165.479996,-42.12,2013,2013MSFT
5,2013-10-14,AAPL,3.15,0.0,-10.0,170.940002,22.48,2013,2013AAPL
9,2013-10-15,XOM,0.78,0.0,-10.0,169.699997,7.81,2013,2013XOM
...,...,...,...,...,...,...,...,...,...
643,2021-10-12,AAPL,1.42,0.0,9407.0,433.619995,3130.38,2021,2021AAPL
647,2021-10-20,JNJ,0.00,24.0,9463.0,452.410004,3352.70,2021,2021JNJ
652,2021-10-27,V,0.00,-129.0,9149.0,453.940002,3370.80,2021,2021V
651,2021-10-27,MSFT,0.00,238.0,9387.0,453.940002,3370.80,2021,2021MSFT


In [33]:
#df_chart_comp.to_excel('BuyBeforeEarnings\output\BBE_analysis.xlsx')

In [98]:
# sort again to restore order
df_chart_comp.sort_values(by="Date")

Unnamed: 0,Date,Symbol,Trade Price,Profit/Loss,CumSum,SPY Price,SPY_buy_hold,Year,Y_Symbol
1,2013-09-30,JNJ,0.87,0.0,0.0,168.009995,-12.19,2013,2013JNJ
2,2013-10-02,GE,0.24,0.0,0.0,169.179993,1.66,2013,2013GE
4,2013-10-08,MSFT,0.32,0.0,0.0,165.479996,-42.12,2013,2013MSFT
5,2013-10-14,AAPL,3.15,0.0,-10.0,170.940002,22.48,2013,2013AAPL
9,2013-10-15,XOM,0.78,0.0,-10.0,169.699997,7.81,2013,2013XOM
...,...,...,...,...,...,...,...,...,...
643,2021-10-12,AAPL,1.42,0.0,9407.0,433.619995,3130.38,2021,2021AAPL
647,2021-10-20,JNJ,0.00,24.0,9463.0,452.410004,3352.70,2021,2021JNJ
651,2021-10-27,MSFT,0.00,238.0,9387.0,453.940002,3370.80,2021,2021MSFT
652,2021-10-27,V,0.00,-129.0,9149.0,453.940002,3370.80,2021,2021V


In [100]:
df_chart_comp.to_excel('df_chart_comp.xlsx')

In [104]:
# Load the provided dataframe

# Plot using plotly.express
fig = px.line(df_chart_comp, x='Date', y='CumSum', title='Cumulative Sum over Date')

fig.show()



## Metrics

### Annualized return

In [35]:
# # calculations for SPY
# # Drop rows with NaN values in 'SPY Price' column
# spy_starting_price = df_chart_comp['SPY Price'].iloc[0]
# df_chart_comp_cleaned = df_chart_comp.dropna(subset=['SPY Price'])

# # Get the new ending price of SPY
# spy_ending_price_cleaned = df_chart_comp_cleaned['SPY Price'].iloc[-1]

# # Recalculate the total return for SPY
# spy_total_return_cleaned = (spy_ending_price_cleaned / spy_starting_price) - 1

# spy_total_return_cleaned


In [36]:
# ### Annualized return
# total_return = df_chart_comp['Profit/Loss'].sum()
# # Calculate the total number of days the strategy ran
# num_days = (df_chart_comp['Date'].iloc[-1] - df_chart_comp['Date'].iloc[0]).days

# # Calculate the annualized return using the total return and number of days
# annualized_return = ((1 + total_return / df_chart_comp['CumSum'].iloc[0]) ** (365.0 / num_days)) - 1

# annualized_return = round(annualized_return*100,2)
# annualized_return

In [37]:
# ### Annualized return SPY
# # Calculate the total number of days the data covers (for the cleaned data)
# num_days_spy = (df_chart_comp_cleaned['Date'].iloc[-1] - df_chart_comp_cleaned['Date'].iloc[0]).days

# # Calculate the annualized return for SPY using the total return and number of days
# spy_annualized_return = ((1 + spy_total_return_cleaned) ** (365.0 / num_days_spy)) - 1

# spy_annualized_return =round(spy_annualized_return*100,2)
# spy_annualized_return


### Volatility (Standard Deviation):

In [38]:
# # 1. Compute the daily returns
# df_chart_comp['Daily Returns'] = df_chart_comp['CumSum'].pct_change()

# # 2. Calculate the standard deviation of these daily returns
# daily_volatility = df_chart_comp['Daily Returns'].std()

# # 3. Annualize the standard deviation
# annualized_volatility = round(daily_volatility * (252**0.5),2)  # Using 252 trading days in a year

# annualized_volatility


### Drawdown

In [39]:
# 1. Create a new column for cumulative balance
df_chart_comp['Cumulative Balance'] = init_cap + df_chart_comp['Profit/Loss'].cumsum()

# 2. Calculate running max for the cumulative balance
running_max_balance = df_chart_comp['Cumulative Balance'].cummax()

# 3. Calculate drawdowns as the decline from the running max balance
drawdowns_balance = (df_chart_comp['Cumulative Balance'] - running_max_balance) / running_max_balance

# 4. Identify the maximum drawdown
max_drawdown_balance = drawdowns_balance.min()

max_drawdown_balance


-0.5786802030456852

### consecutive loss trades
 

In [40]:
# 1. Create a binary column for loss days
df_chart_comp['Loss Day'] = (df_chart_comp['Profit/Loss'] < 0).astype(int)

# 2. Calculate the cumulative sum of loss days, but reset to zero whenever a non-loss day is encountered
df_chart_comp['Consecutive Loss Days'] = df_chart_comp['Loss Day'].groupby((df_chart_comp['Loss Day'] == 0).cumsum()).cumsum()

# 3. Find the maximum number of consecutive loss days
max_consecutive_loss_days = df_chart_comp['Consecutive Loss Days'].max()

max_consecutive_loss_days


7

In [41]:
#df_chart_comp_cleaned

## sharpe ratio

In [42]:
#average_annual = df_annual['Annual Profit'].mean()

## Sortino Ratio