In [22]:
import pandas as pd
import pyxirr
import yfinance as yf
import numpy as np

In [3]:
file = 'D:\jupyter notebooks\investment_returns.xlsx'
sheets = ['Saxo - SGD', 'IBKR - SGD(U7470748)', 'IBKR endowment plan', 'Tiger broker', 'philips - SGD']

In [4]:
dfs = {sheet : pd.read_excel(file, sheet_name=sheet) for sheet in sheets}

In [5]:
dfs['Saxo - SGD']['source'] = 'saxo'
dfs['IBKR - SGD(U7470748)']['source'] = 'ibkr'
dfs['IBKR endowment plan']['source'] = 'ibkr_endowment'
dfs['Tiger broker']['source'] = 'tiger'
dfs['philips - SGD']['source'] = 'philips'

In [48]:
def remove_unnamed_columns(dfs: dict):
    for key in dfs.keys():
        dfs[key] = dfs[key].loc[:, ~dfs[key].columns.str.contains('^Unnamed')]

def filter_invalid_timestamps(dfs: dict):
    for key, df in dfs.items():
        df['parse_timestamp'] = pd.to_datetime(df['Date'], errors='coerce')
        invalid_rows = df[df['parse_timestamp'].isna()]
        df.drop(invalid_rows.index, inplace=True)
        df['date'] = df['parse_timestamp'].dt.date
        df.drop(columns=['parse_timestamp', 'Date'], inplace=True)
        dfs[key] = df
        print(f"dropped invalid rows in {key}")
        print(invalid_rows)

def rename_columns(dfs: dict):
    for key, df in dfs.items():
        df.columns = [col.lower() for col in df.columns]
        df = df[['date', 'source', 'amount', 'desc']]
        dfs[key] = df

def get_sp500_data(start_date: pd.Timestamp, end_date: pd.Timestamp):
    sp500_ticker = '^GSPC'
    sp500_data = yf.download(sp500_ticker, start=start_date, end=end_date, progress=False)
    assert sp500_data is not None
    sp500_data.columns = sp500_data.columns.get_level_values(0)
    sp500_data.columns.name = None

    return sp500_data
    

In [7]:
set(dfs['Saxo - SGD'].columns.to_list() + dfs['IBKR - SGD(U7470748)'].columns.to_list() + \
    dfs['IBKR endowment plan'].columns.to_list() + dfs['Tiger broker'].columns.to_list() + \
    dfs['philips - SGD'].columns.to_list())

{'Amount', 'Date', 'Desc', 'Unnamed: 3', 'source'}

In [8]:
remove_unnamed_columns(dfs)
filter_invalid_timestamps(dfs)
rename_columns(dfs)


dropped invalid rows in Saxo - SGD
          Date Desc        Amount source parse_timestamp
28         NaN  NaN           NaN   saxo             NaT
30         NaN  NaN           NaN   saxo             NaT
31         NaN  NaN           NaN   saxo             NaT
32         NaN  NaN           NaN   saxo             NaT
33  Investment  NaN  52292.000000   saxo             NaT
34      Profit  NaN  17708.000000   saxo             NaT
35        XIRR  NaN      0.062374   saxo             NaT
dropped invalid rows in IBKR - SGD(U7470748)
          Date Desc         Amount source parse_timestamp
58         NaN  NaN            NaN   ibkr             NaT
59  Investment  NaN  209791.000000   ibkr             NaT
60      Profit  NaN   50909.000000   ibkr             NaT
61        XIRR  NaN       0.167748   ibkr             NaT
dropped invalid rows in IBKR endowment plan
          Date Desc        Amount          source parse_timestamp
23         NaN  NaN           NaN  ibkr_endowment             Na

In [9]:
df = pd.concat(dfs.values(), axis=0)

In [10]:
df.date = pd.to_datetime(df.date, errors='coerce')

In [11]:
# df_combined = df.groupby(['date', 'desc'], as_index=False)['amount'].sum()
df_curr = df[df['desc'] == 'current value']
df_other = df[df['desc'] != 'current value']
df_curr



Unnamed: 0,date,source,amount,desc
29,2025-07-02,saxo,70000.0,current value
57,2025-07-02,ibkr,260700.0,current value
22,2025-07-02,ibkr_endowment,24900.0,current value
28,2025-07-09,tiger,276000.0,current value
32,2025-02-13,philips,11113.0,current value


In [12]:
idx = df_curr['date'].idxmax()
amount_sum = df_curr['amount'].sum()
df_curr = df_curr.loc[[idx],:].copy()
df_curr['amount'] = amount_sum
df_curr['source'] = 'all_sources'
df_curr

Unnamed: 0,date,source,amount,desc
28,2025-07-09,all_sources,642713.0,current value


In [13]:
df = pd.concat([df_other, df_curr], axis=0).sort_values(by='date', ascending=True).reset_index(drop=True)
df

Unnamed: 0,date,source,amount,desc
0,2019-02-22,philips,-843.0,Invest
1,2019-03-25,philips,-737.0,Invest
2,2019-04-29,philips,-770.0,Invest
3,2019-05-23,philips,-800.0,Invest
4,2019-06-27,philips,-800.0,Invest
...,...,...,...,...
163,2025-05-07,ibkr_endowment,-544.0,VOO
164,2025-05-12,tiger,-10000.0,
165,2025-07-02,ibkr,-6200.0,FBTC
166,2025-07-09,tiger,-10000.0,


In [None]:
my_returns = pyxirr.xirr(dict(zip(df['date'], df['amount'])))
assert my_returns is not None
print(f"XIRR: {my_returns * 100:.2f}%")

XIRR: 18.34%


In [15]:
df['desc'].unique()

array(['Invest', 'Invest saxo', 'invest', 'withdraw saxo',
       'Initial investment', 'epam', 'withdraw', 'Invest IB', 'SG reits',
       'Jan deposit', 'Transfer to IBKR endowment', 'Feb',
       'transfered from other account', '  mar + bonus', 'ocbc',
       ' invest', 'trxf to other IBKR account', 'tesla, voo, msft',
       'tesla, voo, msft : sent as USD(113 USD)',
       'tesla, voo, msft : trxf from IBKR endowment plan (150USD)', 'dbs',
       'transfer to  IB endowment account',
       'invest from other ibkr account',
       'transfer from IB endowment account', 'VOO', 'epam, voo',
       'ocbc, capitaland industrial trust', 'GOOG,SMIN', 'Tesla',
       'VOO,QQQ', 'VOO,TSLA', nan, 'SMIN', 'TSLA, MSFT', 'CICT', 'TSLA',
       'PLTR', 'capitaland ascott', 'QQQ', 'TSLA,QQQ,GOOG',
       'transfer to IBKR endowment', 'VOO, QQQ', 'CY6U',
       'QQQ, PLTR, NVDA', 'FBTC', 'current value'], dtype=object)

In [16]:
df[df['desc']=='current value']

Unnamed: 0,date,source,amount,desc
167,2025-07-09,all_sources,642713.0,current value


In [50]:
start_date = df['date'].min()
end_date = df['date'].max()
print(f"Start date: {start_date}, End date: {end_date}")
sp500_prices = get_sp500_data(start_date, end_date)
sp500_prices.index = pd.to_datetime(sp500_prices.index, errors='coerce')

Start date: 2019-02-22 00:00:00, End date: 2025-07-09 00:00:00


  sp500_data = yf.download(sp500_ticker, start=start_date, end=end_date, progress=False)


In [39]:
sp500_prices.columns

Index(['Close', 'High', 'Low', 'Open', 'Volume'], dtype='object')

In [51]:
transactions = []
total_shares = 0.0
sp500_current_px = 0.0
sp500_current_date = pd.Timestamp('1970-01-01')
for _, rows in df.iterrows():
    date = rows['date']
    amount = rows['amount']
    available_dates = sp500_prices.index
    closest_date = min(available_dates, key=lambda d: abs(d - date))
    px = sp500_prices.loc[closest_date, 'Close']
    # print(f"{date} {closest_date} {px}")
    if rows['desc'] == 'current value':
        sp500_current_px = px
        sp500_current_date = closest_date
        continue    
    
    if amount < 0:
        shares_bought = abs(amount) / px
        total_shares += shares_bought
        transactions.append({
            'date': date.date(),
            'type': 'buy',
            'shares': shares_bought,
            'price_per_share': px,
            'amount': amount
        })
    else:
        shares_sold = amount / px
        total_shares -= shares_sold
        transactions.append({
            'date': date.date(),
            'type': 'sell',
            'shares': shares_sold,
            'price_per_share': px,
            'amount': amount            
        })

print("sp500 equivalent shares:", total_shares)
transactions.append({  
    'date': pd.to_datetime(sp500_current_date).date(),
    'type': 'current value',
    'shares': total_shares,
    'price_per_share': sp500_current_px,
    'amount': total_shares * sp500_current_px
})
print("sp500 equivalent shares:", total_shares)
print("sp500 equivalent current value:", total_shares * sp500_current_px)
sp500_df = pd.DataFrame(transactions)
sp500_df

sp500 equivalent shares: 99.23768263693569
sp500 equivalent shares: 99.23768263693569
sp500 equivalent current value: 617806.1799481319


Unnamed: 0,date,type,shares,price_per_share,amount
0,2019-02-22,buy,0.301862,2792.669922,-843.000000
1,2019-03-25,buy,0.263369,2798.360107,-737.000000
2,2019-04-29,buy,0.261635,2943.030029,-770.000000
3,2019-05-23,buy,0.283463,2822.239990,-800.000000
4,2019-06-27,buy,0.273512,2924.919922,-800.000000
...,...,...,...,...,...
163,2025-05-07,buy,0.096603,5631.279785,-544.000000
164,2025-05-12,buy,1.711101,5844.189941,-10000.000000
165,2025-07-02,buy,0.995597,6227.419922,-6200.000000
166,2025-07-09,buy,1.606292,6225.520020,-10000.000000


In [54]:
print(type(sp500_df.loc[0, 'date']))
print(type(df.loc[0, 'date']))
print(type(sp500_df.loc[0, 'amount']))

<class 'datetime.date'>
<class 'pandas._libs.tslibs.timestamps.Timestamp'>
<class 'numpy.float64'>


In [56]:
sp500_returns = pyxirr.xirr(dict(zip(sp500_df['date'], sp500_df['amount'])))
assert sp500_returns is not None
print(f"IRR if invested in sp500: {sp500_returns * 100:.2f}%")

IRR if invested in sp500: 15.52%


In [58]:
print(f"IRR of my investments: {my_returns * 100:.2f}%")

IRR of my investments: 18.34%


In [66]:
total_investment = df[df['desc'] != 'current value']['amount'].sum() * -1
current_value = df[df['desc'] == 'current value']['amount'].sum()
profit = current_value - total_investment
xirr = my_returns * 100
sp500_xirr = sp500_returns * 100
returns = profit / total_investment * 100
xirr_sp500 = sp500_returns * 100
print(f"total investment: {total_investment:.2f}")
print(f"current value: {current_value:.2f}")
print(f"profit: {profit:.2f}")
print(f"returns: {returns:.2f}%")
print(f"XIRR of my investments: {xirr:.2f}%")
print(f"XIRR of sp500: {sp500_xirr:.2f}%")

total investment: 478926.00
current value: 642713.00
profit: 163787.00
returns: 34.20%
XIRR of my investments: 18.34%
XIRR of sp500: 15.52%
