In [8]:
import pandas as pd
import pyxirr
import yfinance as yf
import numpy as np

In [16]:
INVEST_FILE = 'D:\jupyter notebooks\investment_returns.xlsx'
SHEETS_TO_SOURCE_MAP = {"Saxo - SGD": "saxo", 
                        "IBKR - SGD(U7470748)": "ibkr",
                         "IBKR endowment plan": "ibkr_endowment", 
                         "Tiger broker": "tiger", 
                         "philips - SGD": "philips"}

In [30]:
def remove_unnamed_columns(dfs: dict):
    for key in dfs.keys():
        dfs[key] = dfs[key].loc[:, ~dfs[key].columns.str.contains('^Unnamed')]

def filter_invalid_timestamps(dfs: dict):
    for key, df in dfs.items():
        df['parse_timestamp'] = pd.to_datetime(df['Date'], errors='coerce')
        invalid_rows = df[df['parse_timestamp'].isna()]
        df.drop(invalid_rows.index, inplace=True)
        df['date'] = df['parse_timestamp'].dt.date
        df.drop(columns=['parse_timestamp', 'Date'], inplace=True)
        dfs[key] = df
        # print(f"dropped invalid rows in {key}")
        # print(invalid_rows)

def rename_columns(dfs: dict):
    for key, df in dfs.items():
        df.columns = [col.lower() for col in df.columns]
        df = df[['date', 'source', 'amount', 'desc']]
        dfs[key] = df

def get_sp500_data(start_date: pd.Timestamp, end_date: pd.Timestamp):
    sp500_ticker = '^GSPC'
    sp500_data = yf.download(sp500_ticker, start=start_date, end=end_date, progress=False)
    assert sp500_data is not None
    sp500_data.columns = sp500_data.columns.get_level_values(0)
    sp500_data.columns.name = None

    return sp500_data

def calculate_my_returns() -> pd.DataFrame:
    sheets = list(SHEETS_TO_SOURCE_MAP.keys())
    dfs = {sheet : pd.read_excel(INVEST_FILE, sheet_name=sheet) for sheet in sheets}
    for sheet, source in SHEETS_TO_SOURCE_MAP.items():
        dfs[sheet]["source"] = source
    
    remove_unnamed_columns(dfs)
    filter_invalid_timestamps(dfs)
    rename_columns(dfs)

    df = pd.concat(dfs.values(), axis=0)
    df.date = pd.to_datetime(df.date, errors='coerce')

    df_curr = df[df['desc'] == 'current value']
    df_other = df[df['desc'] != 'current value']

    idx = df_curr['date'].idxmax()
    amount_sum = df_curr['amount'].sum()
    df_curr = df_curr.loc[[idx],:].copy()
    df_curr['amount'] = amount_sum
    df_curr['source'] = 'all_sources'

    df = pd.concat([df_other, df_curr], axis=0).sort_values(by='date', ascending=True).reset_index(drop=True)

    return df


def calculate_sp500_returns(my_df: pd.DataFrame) -> pd.DataFrame:
    start_date = my_df['date'].min()
    end_date = my_df['date'].max()
    print(f"Start date: {start_date}, End date: {end_date}")
    sp500_prices = get_sp500_data(start_date, end_date)
    sp500_prices.index = pd.to_datetime(sp500_prices.index, errors='coerce')

    transactions = []
    total_shares = 0.0
    sp500_current_px = 0.0
    sp500_current_date = pd.Timestamp('1970-01-01')
    for _, rows in my_df.iterrows():
        date = rows['date']
        amount = rows['amount']
        available_dates = sp500_prices.index
        closest_date = min(available_dates, key=lambda d: abs(d - date))
        px = sp500_prices.loc[closest_date, 'Close']
        # print(f"{date} {closest_date} {px}")
        if rows['desc'] == 'current value':
            sp500_current_px = px
            sp500_current_date = closest_date
            continue    
        
        if amount < 0:
            shares_bought = abs(amount) / px
            total_shares += shares_bought
            transactions.append({
                'date': date.date(),
                'type': 'buy',
                'shares': shares_bought,
                'price_per_share': px,
                'amount': amount
            })
        else:
            shares_sold = amount / px
            total_shares -= shares_sold
            transactions.append({
                'date': date.date(),
                'type': 'sell',
                'shares': shares_sold,
                'price_per_share': px,
                'amount': amount            
            })

    # print("sp500 equivalent shares:", total_shares)
    transactions.append({  
        'date': pd.to_datetime(sp500_current_date).date(),
        'type': 'current value',
        'shares': total_shares,
        'price_per_share': sp500_current_px,
        'amount': total_shares * sp500_current_px
    })
    print(f"sp500 equivalent shares:{total_shares:.2f}")
    sp500_df = pd.DataFrame(transactions)
    return sp500_df

def calculate_irr(df: pd.DataFrame) -> float:
    returns = pyxirr.xirr(dict(zip(df['date'], df['amount'])))
    assert returns is not None
    return returns * 100

def total_investment(df: pd.DataFrame) -> float:
    total = df[df['desc'] != 'current value']['amount'].sum() * -1
    return total

def current_value(df: pd.DataFrame) -> float:
    return df[df['desc'] == 'current value']['amount'].sum()

def investment_summary():
    my_df = calculate_my_returns()
    sp500_df = calculate_sp500_returns(my_df)

    total_invest = total_investment(my_df)
    current_val = current_value(my_df)
    pnl = current_val - total_invest
    pnl_perc = pnl / total_invest * 100

    my_irr = calculate_irr(my_df)
    sp500_irr = calculate_irr(sp500_df)

    print(f"total investment: S$ {total_invest:,.0f}")
    print(f"current value: S$ {current_val:,.0f}")
    print(f"PnL: S$ {pnl:,.2f}")
    print(f"returns: {pnl_perc:.2f}%")
    print(f"IRR of my investments: {my_irr:.2f}%")
    print(f"IRR of sp500: {sp500_irr:.2f}%")

In [31]:
investment_summary()

Start date: 2019-02-22 00:00:00, End date: 2025-07-09 00:00:00


  sp500_data = yf.download(sp500_ticker, start=start_date, end=end_date, progress=False)


sp500 equivalent shares:99.24
total investment: S$ 478,926
current value: S$ 642,713
PnL: S$ 163,787.00
returns: 34.20%
IRR of my investments: 18.34%
IRR of sp500: 15.52%
