In [16]:
import pandas as pd
import pyxirr
import yfinance as yf
import numpy as np
from enum import Enum

In [3]:
INVEST_FILE = 'G:\My Drive\invest\investment_returns.xlsx'
STOCK_FILE = 'G:\My Drive\invest\stock_purchase.xlsx'
SHEETS_TO_SOURCE_MAP = {"Saxo - SGD": "saxo", 
                        "IBKR - SGD(U7470748)": "ibkr",
                         "IBKR endowment plan": "ibkr_endowment", 
                         "Tiger broker": "tiger", 
                         "philips - SGD": "philips"}

In [4]:
def remove_unnamed_columns(dfs: dict):
    for key in dfs.keys():
        dfs[key] = dfs[key].loc[:, ~dfs[key].columns.str.contains('^Unnamed')]

def filter_invalid_timestamps(dfs: dict):
    for key, df in dfs.items():
        df['parse_timestamp'] = pd.to_datetime(df['Date'], errors='coerce')
        invalid_rows = df[df['parse_timestamp'].isna()]
        df.drop(invalid_rows.index, inplace=True)
        df['date'] = df['parse_timestamp'].dt.date
        df.drop(columns=['parse_timestamp', 'Date'], inplace=True)
        dfs[key] = df
        # print(f"dropped invalid rows in {key}")
        # print(invalid_rows)

def rename_columns(dfs: dict):
    for key, df in dfs.items():
        df.columns = [col.lower() for col in df.columns]
        df = df[['date', 'source', 'amount', 'desc']]
        dfs[key] = df

def get_sp500_data(start_date: pd.Timestamp, end_date: pd.Timestamp):
    sp500_ticker = '^GSPC'
    sp500_data = yf.download(sp500_ticker, start=start_date, end=end_date + pd.Timedelta(days=1), progress=False, auto_adjust=True)
    assert sp500_data is not None
    sp500_data.columns = sp500_data.columns.get_level_values(0)
    sp500_data.columns.name = None
    sp500_data.index = pd.to_datetime(sp500_data.index, errors='coerce').map(lambda x: x.date() if isinstance(x, pd.Timestamp) else x)
    return sp500_data

def calculate_my_returns() -> pd.DataFrame:
    sheets = list(SHEETS_TO_SOURCE_MAP.keys())
    dfs = {sheet : pd.read_excel(INVEST_FILE, sheet_name=sheet) for sheet in sheets}
    for sheet, source in SHEETS_TO_SOURCE_MAP.items():
        dfs[sheet]["source"] = source
    
    remove_unnamed_columns(dfs)
    filter_invalid_timestamps(dfs)
    rename_columns(dfs)

    df = pd.concat(dfs.values(), axis=0)
    df.date = pd.to_datetime(df.date, errors='coerce')
    df.sort_values(by='date', ascending=True, inplace=True)

    df_curr = df[df['desc'] == 'current value'].copy()
    df_other = df[df['desc'] != 'current value'].copy()

    df_other['cumulative_investment'] = -df_other['amount'].cumsum()
    df_curr['cumulative_investment'] = df_other['cumulative_investment'].iloc[-1]
    
    idx = df_curr['date'].idxmax()
    amount_sum = df_curr['amount'].sum()
    df_curr = df_curr.loc[[idx],:].copy()
    df_curr['amount'] = amount_sum
    df_curr['source'] = 'all_sources'


    df = pd.concat([df_other, df_curr], axis=0).sort_values(by='date', ascending=True).reset_index(drop=True)

    return df


def calculate_sp500_returns(my_df: pd.DataFrame) -> pd.DataFrame:
    start_date = my_df['date'].min()
    end_date = my_df['date'].max()
    sp500_prices = get_sp500_data(start_date, end_date)
    sp500_prices.index = pd.to_datetime(sp500_prices.index, errors='coerce')

    transactions = []
    total_shares = 0.0
    for _, rows in my_df.iterrows():
        date = rows['date']
        amount = rows['amount']
        available_dates = sp500_prices.index
        closest_date = min(available_dates, key=lambda d: abs(d - date))
        px = sp500_prices.loc[closest_date, 'Close'].astype(float)
        if rows['desc'] == 'current value':
            transactions.append({
                'date': date.date(),
                'type': 'current value',
                'qty': total_shares,
                'px': px,
                'amount': round(px * total_shares, 1),
                'cum_qty': total_shares,
                'market_value': round(px * total_shares)
            })
        elif amount < 0:
            shares_bought = abs(amount) / px
            total_shares += shares_bought
            transactions.append({
                'date': date.date(),
                'type': 'buy',
                'qty': shares_bought,
                'px': px,
                'amount': amount,
                'cum_qty': total_shares,
                'market_value': px * total_shares
            })
        else:
            shares_sold = amount / px
            total_shares -= shares_sold
            transactions.append({
                'date': date.date(),
                'type': 'sell',
                'qty': shares_sold,
                'px': px,
                'amount': amount,
                'cum_qty': total_shares,
                'market_value': px * total_shares            
            })

    sp500_df = pd.DataFrame(transactions)
    return sp500_df

def calculate_irr(df: pd.DataFrame) -> float:
    returns = pyxirr.xirr(dict(zip(df['date'], df['amount'])))
    assert returns is not None
    return returns * 100

def total_investment(df: pd.DataFrame) -> float:
    total = df[df['desc'] != 'current value']['amount'].sum() * -1
    return total

def current_value(df: pd.DataFrame) -> float:
    return df[df['desc'] == 'current value']['amount'].sum()

def investment_summary() -> list[pd.DataFrame]:
    my_df = calculate_my_returns()
    sp500_df = calculate_sp500_returns(my_df)

    total_invest = total_investment(my_df)
    market_value = current_value(my_df)
    pnl = market_value - total_invest
    pnl_perc = pnl / total_invest * 100

    my_irr = calculate_irr(my_df.groupby('date').agg({'amount': 'sum', 'cumulative_investment': 'last'}).reset_index())
    sp500_irr = calculate_irr(sp500_df.groupby('date').agg({'amount': 'sum', 'market_value': 'last'}).reset_index())

    sp500_equivalent_market_value = sp500_df[sp500_df['type'] == 'current value'].loc[:, 'market_value'].iloc[0]
    sp500_equivalent_shares = sp500_df[sp500_df['type'] == 'current value'].loc[:, 'cum_qty'].iloc[0]
    beat_sp500_perc = (market_value - sp500_equivalent_market_value) / sp500_equivalent_market_value * 100

    print(f"Total investment: S$ {total_invest:,.0f}")
    print(f"Market value: S$ {market_value:,.0f}")
    print(f"PnL: S$ {pnl:,.2f}")
    print(f"Returns: {pnl_perc:.2f}%")
    print(f"IRR of my investments: {my_irr:.2f}%")
    print(f"IRR of sp500: {sp500_irr:.2f}%")

    print(f"S&P500 equivalent shares: {sp500_equivalent_shares:.2f}")
    print(f"S&P500 equivalent market value: S$ {sp500_equivalent_market_value:,.0f}")
    print(f"beat S&P500 by: {beat_sp500_perc:.2f}%")

    return [my_df, sp500_df]

def plot_investment_comparison(my_df: pd.DataFrame, sp500_df: pd.DataFrame):
    start_date = sp500_df['date'].min()
    end_date = my_df[my_df['desc'] == 'current value']['date'].iloc[0]
    sp500_prices = get_sp500_data(start_date, end_date)

    daily_values = pd.DataFrame(index=sp500_prices.index)
    daily_values['qty'] = np.nan

    # Calculate daily S&P500 shares and values
    for date in daily_values.index:
        # Get cumulative shares up to this date
        mask = sp500_df['date'] <= date
        if mask.any():
            daily_values.loc[date, 'qty'] = sp500_df[mask]['cum_qty'].iloc[-1]
        else:
            daily_values.loc[date, 'qty'] = 0

    daily_values['sp500_close'] = sp500_prices['Close']
    daily_values['sp500_value'] = daily_values['qty'] * daily_values['sp500_close']

    # Get final values
    final_portfolio_value = my_df[my_df['desc'] == 'current value']['amount'].iloc[0]
    final_sp500_value = sp500_df[sp500_df['type'] == 'current value']['amount'].iloc[0]

    # Plot
    import plotly.graph_objects as go
    fig = go.Figure()

    # Plot cumulative investment line (green)
    fig.add_trace(go.Scatter(
        x=my_df['date'],
        y=my_df['cumulative_investment'],
        mode='lines',
        name='Total Invested (S$)',
        line=dict(color='green')
    ))

    # Plot S&P500 mark-to-market value (red)
    fig.add_trace(go.Scatter(
        x=daily_values.index,
        y=daily_values['sp500_value'],
        mode='lines',
        name='S&P500 Mark-to-Market (S$)',
        line=dict(color='red')
    ))

    # Add marker for actual portfolio value (blue)
    fig.add_trace(go.Scatter(
        x=[end_date],
        y=[final_portfolio_value],
        mode='markers+text',
        name='Current Portfolio Value',
        text=[f'Portfolio: S${final_portfolio_value:,.0f}'],
        textposition='top right',
        marker=dict(color='blue', size=10)
    ))

    # Add marker for S&P500 final value (red)
    fig.add_trace(go.Scatter(
        x=[end_date],
        y=[final_sp500_value],
        mode='markers+text',
        name='S&P500 Final Value',
        text=[f'S&P500: S${final_sp500_value:,.0f}'],
        textposition='bottom right',
        marker=dict(color='red', size=10)
    ))

    fig.update_layout(
        title='Investment Growth Comparison',
        xaxis_title='Date',
        yaxis_title='Value (S$)',
        showlegend=True
    )
    
    fig.show()

In [5]:
my_df, sp500_df = investment_summary()

Total investment: S$ 489,126
Market value: S$ 663,004
PnL: S$ 173,878.00
Returns: 35.55%
IRR of my investments: 18.25%
IRR of sp500: 15.27%
S&P500 equivalent shares: 100.86
S&P500 equivalent market value: S$ 629,185
beat S&P500 by: 5.38%


In [6]:
plot_investment_comparison(my_df, sp500_df)

In [50]:
stocks = pd.read_excel(STOCK_FILE, sheet_name="stocks")
stocks[stocks['ticker']=='BABA']

Unnamed: 0.1,Unnamed: 0,ticker,side,px,qty,broker
0,2021-08-31,BABA,BUY,168.0,10,saxo
40,2024-09-18,BABA,SELL,85.26,10,saxo


In [51]:
# To add multiple values for an Enum, you can use tuples or assign aliases:
class Side(Enum):
    BUY = "buy"
    SELL = "sell"


class Holdings: 
    def __init__(self, ticker: str):
        self.ticker = ticker
        self.shares = 0
        self.sold_shares = 0
        self.bought_shares = 0
        self.market_value = 0
        self.market_px = get_last_close_px(self.ticker)
        self.spent = 0
        self.proceeds = 0
        self.cost_basis = 0
        self.cost_basis_per_share = 0
        self.proceeds_per_share = 0
        self.realised_pnl = 0
        self.unrealised_pnl = 0
        self.pnl = 0
        self.returns = 0

    def add_transaction(self, shares: float, px: float, side: Side):
        if side == Side.SELL:
            self.shares -= shares
            self.proceeds +=  shares * px
            self.sold_shares += shares
        else:
            self.shares += shares
            self.spent += shares * px
            self.bought_shares += shares


        self.cost_basis = (self.spent - self.proceeds) * -1
        self.cost_basis_per_share = (self.spent - self.proceeds) / self.shares if self.shares > 0 else 0
        self.realised_pnl = self.proceeds - self.spent * (self.sold_shares / self.bought_shares)  if self.proceeds > 0 else 0
        self.unrealised_pnl = (self.market_px - self.cost_basis_per_share) * self.shares if self.shares > 0 else 0
        self.pnl = self.realised_pnl + self.unrealised_pnl
        self.market_value = self.market_px * self.shares
        self.returns = (self.pnl / self.spent) * 100 if self.spent > 0 else 0


def get_last_close_px(ticker: str) -> float:
    data = yf.download(ticker, period='1d', interval='5d', progress=False, auto_adjust=True)
    assert data is not None, f"No data found for ticker: {ticker}"
    if data.empty:
        raise ValueError(f"No data found for ticker: {ticker}")
    
    return data['Close'].iloc[-1].item()


def calculate_stock_holdings(df: pd.DataFrame) -> pd.DataFrame:

    holdings = {ticker: Holdings(ticker) for ticker in df['ticker'].unique()}
    for index, row in df.iterrows():
        ticker = row['ticker']
        holdings[ticker].add_transaction(row['qty'], row['px'], Side(row['side'].lower()))


    df = pd.DataFrame({
        'ticker': [h.ticker for h in holdings.values()],
        'shares': [h.shares for h in holdings.values()],
        'cost_basis': [h.cost_basis for h in holdings.values()],
        'cost_basis_per_share': [round(h.cost_basis_per_share, 2) for h in holdings.values()],
        'market_px': [round(h.market_px, 2) for h in holdings.values()],
        'market_value': [round(h.market_px * h.shares, 2) for h in holdings.values()],
        'realised_pnl': [round(h.realised_pnl, 2) for h in holdings.values()],
        'unrealised_pnl': [round(h.unrealised_pnl, 2) for h in holdings.values()],
        'pnl': [round(h.pnl, 2) for h in holdings.values()],
        'returns': [f"{round(h.returns, 2)}%" for h in holdings.values()]})
    
    return df

calculate_stock_holdings(stocks)

Unnamed: 0,ticker,shares,cost_basis,cost_basis_per_share,market_px,market_value,realised_pnl,unrealised_pnl,pnl,returns
0,BABA,0,-827.4,0.0,117.07,0.0,-827.4,0.0,-827.4,-49.25%
1,SMIN,130,-9454.9,72.73,73.02,9492.6,0.0,37.7,37.7,0.4%
2,O39.SI,500,-6085.0,12.17,16.79,8395.0,0.0,2310.0,2310.0,37.96%
3,INDY,254,-13094.08,51.55,51.77,13149.58,0.0,55.5,55.5,0.42%
4,D05.SI,320,-10652.6,33.29,47.6,15232.0,0.0,4579.4,4579.4,42.99%
5,META,47,-14580.8,310.23,750.01,35250.47,0.0,20669.67,20669.67,141.76%
6,J69U.SI,2300,-4922.0,2.14,2.22,5106.0,0.0,184.0,184.0,3.74%
7,ME8U.SI,6500,-14235.0,2.19,2.02,13130.0,0.0,-1105.0,-1105.0,-7.76%
8,AAPL,20,-2275.0,113.75,202.38,4047.6,0.0,1772.6,1772.6,77.92%
9,QQQ,150,-69818.57,465.46,553.88,83082.0,0.0,13263.43,13263.43,19.0%
