## Start

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from pathlib import Path
import os
import glob
from scipy import optimize
import importlib

In [2]:
project_path = Path.cwd().parent
data_dir = f'{project_path}/data'
output_dir = f'{project_path}/output'

In [3]:
test_case_pair = [
    ("Z23390746", "AAPL"),
    ("Z23390746", "FXAIX"),
    ("Z23390746", "912797SZ1"),
    ("Z23390746", "912797RJ8"),
    ("86964", "84679P173"),
    ("86964", "FBCGX"),
    ("Z06872898", None),
    ("241802439", "FZILX"),
    ("241802439", "FXAIX"),
]

# data_loader

## get_latest_position_file

In [7]:
files = glob.glob(os.path.join(data_dir, 'Portfolio_Positions_*.csv'))
files

['/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Apr-06-2025.csv',
 '/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Aug-05-2025.csv',
 '/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Nov-16-2025.csv',
 '/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Apr-29-2025.csv',
 '/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Jan-15-2026.csv',
 '/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Dec-31-2025.csv',
 '/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Apr-02-2025.csv',
 '/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_May-06-2025.csv',
 '/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Jan-31-2026.csv']

In [8]:
latest_file = None
latest_date = None

for f in files:
    basename = os.path.basename(f)
    date_part = basename.replace('Portfolio_Positions_', '').replace('.csv', '')
    try:
        date_obj = datetime.strptime(date_part, '%b-%d-%Y')
        if latest_date is None or date_obj > latest_date:
            latest_date = date_obj
            latest_file = f
    except ValueError:
        continue
latest_date = pd.to_datetime(latest_date)
[latest_file, latest_date]

['/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Jan-31-2026.csv',
 Timestamp('2026-01-31 00:00:00')]

## clean_positions

In [6]:
from support_functions.data_loader import get_latest_position_file, clean_currency

data_dir = f'{project_path}/data'
pos_file, pos_date = get_latest_position_file(data_dir)
positions_df = pd.read_csv(pos_file, index_col=False)
positions_df = positions_df.dropna(subset=['Account Name'])
cols_to_clean = [
    'Last Price', 'Current Value', 
    'Cost Basis Total', 'Today\'s Gain/Loss Dollar', 
    'Total Gain/Loss Dollar'
]

In [7]:
for col in cols_to_clean:
    if col in positions_df.columns:
        positions_df[col] = positions_df[col].apply(clean_currency)

# Clean Quantity (remove match for formatting issues if any)
if 'Quantity' in positions_df.columns:
    positions_df['Quantity'] = pd.to_numeric(positions_df['Quantity'], errors='coerce').fillna(0)

In [8]:
positions_df['Account Name'].unique()

array(['Individual', 'ERNST & YOUNG 401(K)',
       'Cash Management (Individual)', 'Health Savings Account'],
      dtype=object)

## load_transactions

In [9]:
data_dir = f'{project_path}/data'
max_cols=14
hist_files = glob.glob(os.path.join(data_dir, 'Accounts_History_*.csv'))
transactions_dfs = []
print(f"Found {len(hist_files)} history files.")

for f in hist_files:
    df = pd.read_csv(f, header=0, usecols=range(max_cols))
    transactions_dfs.append(df)

transactions_df = pd.concat(transactions_dfs, ignore_index=True)

Found 4 history files.


In [10]:
transactions_df.columns

Index(['Run Date', 'Account', 'Account Number', 'Action', 'Symbol',
       'Description', 'Type', 'Price ($)', 'Quantity', 'Commission ($)',
       'Fees ($)', 'Accrued Interest ($)', 'Amount ($)', 'Settlement Date'],
      dtype='object')

In [12]:
transactions_df['Account'].unique()

array(['Individual', 'Health Savings Account',
       'Cash Management (Individual)', 'ERNST & YOUNG 401(K)', nan],
      dtype=object)

In [13]:
[transactions_df.iloc[292]['Run Date'],transactions_df.iloc[228]['Run Date']]

['01/31/2024', '04/25/2024']

## clean_transactions

In [10]:
from support_functions import data_loader
importlib.reload(data_loader)
from support_functions.data_loader import load_transactions, clean_currency
data_dir = f'{project_path}/data'
transactions_df = load_transactions(data_dir)

Found 5 history files.


In [11]:
transactions_df.columns

Index(['Run Date', 'Account', 'Account Number', 'Action', 'Symbol',
       'Description', 'Type', 'Price ($)', 'Quantity', 'Commission ($)',
       'Fees ($)', 'Accrued Interest ($)', 'Amount ($)', 'Settlement Date'],
      dtype='object')

In [16]:
transactions_df['Account'].unique()

array(['Individual', 'Health Savings Account',
       'Cash Management (Individual)', 'ERNST & YOUNG 401(K)', nan],
      dtype=object)

In [13]:
transactions_df[transactions_df['Action'].str.contains('Exchange',na=False)]

Unnamed: 0,Run Date,Account,Account Number,Action,Symbol,Description,Type,Price ($),Quantity,Commission ($),Fees ($),Accrued Interest ($),Amount ($),Settlement Date
337,12/17/2025,ERNST & YOUNG 401(K),86964,Exchange Out,,SP 500 INDEX PL CL E,,-60.961,,,,,-19450.82,
338,12/17/2025,ERNST & YOUNG 401(K),86964,Exchange In,,SP 500 INDEX PL CL F,,60.955,,,,,19450.82,


In [26]:
transactions_df['Amount ($)'].unique()

array(['Amount ($)', '-198.36', '-3.26', '0.2', '198.36', '3.26',
       '159.61', '40', '132.4', '308.95', '30000', '-342.04', '342.04',
       '-1642.64', '1642.64', '-11.12', '-1065.29', '11.12', '1065.29',
       '4.85', '35.4', '4', '-98925.69', '-2100', '24.9', '20000',
       '-20000', '-40000', '150000', '-99486.67', '18.3', '-120.16',
       '120.16', '-2.05', '50000', '2.05', '17.5', '-19861.24', '-3200',
       '-29840.57', '5', '7', '105.92', '247.16', '12.5', '-211.35',
       '211.35', '-1.7', '0.22', '1.7', '-19891.97', '-49432.51', '70000',
       '-4600', '-49429.35', '-19926.11', '-49424.93', '-6.64', '-241.99',
       '6.64', '241.99', '-1000', '5.33', '12.44', '11.1', '0.08',
       '-1.27', '0.28', '-2.98', '-83.8', '83.8', '-3.44', '0.23',
       '-29658.75', '3.44', '-29838.3', '10000', '33.4', '25000', '9.62',
       '22.5', '-9941.9', '17.1', '-260.33', '-2.08', '0.24', '260.33',
       '2.08', '-49699.58', '-49363.67', '129.32', '301.76', '100000',
       '-49

In [5]:
transactions_df['Account'].unique()

array(['Individual', 'Cash Management (Individual)',
       'Health Savings Account', nan], dtype=object)

In [41]:
transactions_df[transactions_df['Account']=='Health Savings Account'].head()

Unnamed: 0,Run Date,Account,Account Number,Action,Symbol,Description,Type,Quantity,Price ($),Commission ($),Fees ($),Accrued Interest ($),Amount ($),Settlement Date
142,2024-01-17,Health Savings Account,241802439,PARTIC CONTR CURRENT PARTICIPANT CUR YR (Cash),,No Description,Cash,0.0,0.0,0.0,0.0,0.0,159.61,NaT
156,2024-01-31,Health Savings Account,241802439,INTEREST EARNED FDIC INSURED DEPOSIT AT LEADER...,QKZCQ,FDIC INSURED DEPOSIT AT LEADER BANK HSA,Cash,0.0,0.0,0.0,0.0,0.0,0.16,NaT
141,2024-02-01,Health Savings Account,241802439,PARTIC CONTR CURRENT PARTICIPANT CUR YR (Cash),,No Description,Cash,0.0,0.0,0.0,0.0,0.0,159.61,NaT
140,2024-02-14,Health Savings Account,241802439,PARTIC CONTR CURRENT PARTICIPANT CUR YR (Cash),,No Description,Cash,0.0,0.0,0.0,0.0,0.0,159.61,NaT
157,2024-02-26,Health Savings Account,241802439,EXCHANGED TO FDRXX FIDELITY GOVERNMENT CASH RE...,FDRXX,FIDELITY GOVERNMENT CASH RESERVES,Cash,0.0,0.0,0.0,0.0,0.0,0.0,NaT


## load_data

In [None]:
from support_functions import analysis
importlib.reload(analysis)
from support_functions.data_loader import (
    get_latest_position_file, clean_positions,
    load_transactions, clean_transactions
)

In [4]:
data_dir = f'{project_path}/data'
pos_file, pos_date = get_latest_position_file(data_dir)
print(f"Loading positions from: {pos_file} (Date: {pos_date.strftime('%Y-%m-%d')})")
positions_df = pd.read_csv(pos_file, index_col=False)

positions_df = clean_positions(positions_df)


Loading positions from: /Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Dec-31-2025.csv (Date: 2025-12-31)


In [5]:
transactions_df = load_transactions(data_dir)
    
transactions_df = clean_transactions(transactions_df)

Found 4 history files.


In [88]:
# transactions_df[transactions_df['Symbol'] == 'AAPL']

## Clean Data

In [89]:
from support_functions.data_loader import load_data

data_dir = f'{project_path}/data'
positions_df, transactions_df, latest_date = load_data(data_dir)

Loading positions from: /Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Dec-31-2025.csv (Date: 2025-12-31)
Found 4 history files.


In [90]:
ref_df = positions_df[['Symbol', 'Description']].dropna().drop_duplicates()
desc_to_sym = dict(zip(ref_df['Description'].str.strip(), ref_df['Symbol'].str.strip()))

In [93]:
mask_missing_sym = transactions_df['Symbol'].isna() | (transactions_df['Symbol'] == '')
mapped_syms = transactions_df.loc[mask_missing_sym, 'Description'].str.strip().map(desc_to_sym)
transactions_df.loc[mask_missing_sym, 'Symbol'] = mapped_syms.fillna(transactions_df.loc[mask_missing_sym, 'Symbol'])

In [100]:
desc_to_sym['SP 500 INDEX PL CL F']

'84679P173'

In [None]:
transactions_df.loc[295]['Description']
SP 500 INDEX PL CL E
SP 500 INDEX PL CL F

'SP 500 INDEX PL CL E'

In [95]:
transactions_df[transactions_df['Description']=='SP 500 INDEX PL CL E']

Unnamed: 0,Run Date,Account,Account Number,Action,Symbol,Description,Type,Price ($),Quantity,Commission ($),Fees ($),Accrued Interest ($),Amount ($),Settlement Date,Asset Type
295,2024-01-26,ERNST & YOUNG 401(K),86964,Contributions,,SP 500 INDEX PL CL E,,1.070,0.0,0.0,0.0,0.0,242.30,NaT,Stock
284,2024-02-09,ERNST & YOUNG 401(K),86964,Contributions,,SP 500 INDEX PL CL E,,1.041,0.0,0.0,0.0,0.0,242.30,NaT,Stock
277,2024-02-23,ERNST & YOUNG 401(K),86964,Contributions,,SP 500 INDEX PL CL E,,1.027,0.0,0.0,0.0,0.0,242.30,NaT,Stock
264,2024-03-08,ERNST & YOUNG 401(K),86964,Contributions,,SP 500 INDEX PL CL E,,1.020,0.0,0.0,0.0,0.0,242.30,NaT,Stock
260,2024-03-22,ERNST & YOUNG 401(K),86964,Contributions,,SP 500 INDEX PL CL E,,0.997,0.0,0.0,0.0,0.0,242.30,NaT,Stock
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,2025-11-14,ERNST & YOUNG 401(K),86964,Contributions,,SP 500 INDEX PL CL E,,9.770,0.0,0.0,0.0,0.0,3118.98,NaT,Stock
360,2025-11-28,ERNST & YOUNG 401(K),86964,Contributions,,SP 500 INDEX PL CL E,,0.982,0.0,0.0,0.0,0.0,318.98,NaT,Stock
346,2025-12-12,ERNST & YOUNG 401(K),86964,Contributions,,SP 500 INDEX PL CL E,,0.985,0.0,0.0,0.0,0.0,318.98,NaT,Stock
336,2025-12-17,ERNST & YOUNG 401(K),86964,Change In Market Value,,SP 500 INDEX PL CL E,,0.000,0.0,0.0,0.0,0.0,2395.56,NaT,Stock


# flow_builder

## filter_entity_transactions

In [4]:
from support_functions import data_loader
importlib.reload(data_loader)
from support_functions.data_loader import load_data

In [5]:
data = load_data(data_dir)

Loading positions from: /Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Jan-31-2026.csv (Date: 2026-01-31)
Found 5 history files.


In [8]:
account_num, entity_name = test_case_pair[0]

transactions_df = data.transactions
transactions_df.head()


Unnamed: 0,Run Date,Account,Account Number,Action,Symbol,Description,Type,Price ($),Quantity,Commission ($),Fees ($),Accrued Interest ($),Amount ($),Settlement Date,Asset Type
747,2022-07-26,Individual,Z23390746,Electronic Funds Transfer Received (Cash),,No Description,Cash,0.0,0.0,0.0,0.0,0.0,1000.0,NaT,Stock
746,2022-07-29,Individual,Z23390746,DIVIDEND RECEIVED FIDELITY TREASURY MONEY MARK...,FZFXX,FIDELITY TREASURY MONEY MARKET FUND,Cash,0.0,0.0,0.0,0.0,0.0,0.22,NaT,Cash
745,2022-07-29,Individual,Z23390746,REINVESTMENT FIDELITY TREASURY MONEY MARKET FU...,FZFXX,FIDELITY TREASURY MONEY MARKET FUND,Cash,1.0,0.22,0.0,0.0,0.0,-0.22,NaT,Cash
744,2022-08-05,Individual,Z23390746,YOU BOUGHT APPLE INC (AAPL) (Cash),AAPL,APPLE INC,Cash,163.54,1.0,0.0,0.0,0.0,-163.54,2022-08-09,Stock
743,2022-08-05,Individual,Z23390746,YOU BOUGHT APPLE INC (AAPL) (Cash),AAPL,APPLE INC,Cash,164.61,9.0,0.0,0.0,0.0,-1481.45,2022-08-09,Stock


## filter_entity_positions

In [9]:
from support_functions import data_loader
importlib.reload(data_loader)
from support_functions.data_loader import load_data

In [10]:
data = load_data(data_dir)

Loading positions from: /Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Jan-31-2026.csv (Date: 2026-01-31)
Found 5 history files.


In [11]:
account_num, entity_name = test_case_pair[0]

positions_df = data.positions
positions_df.head()

Unnamed: 0,Account Number,Account Name,Symbol,Description,Quantity,Last Price,Last Price Change,Current Value,Today's Gain/Loss Dollar,Today's Gain/Loss Percent,Total Gain/Loss Dollar,Total Gain/Loss Percent,Percent Of Account,Cost Basis Total,Average Cost Basis,Type,Asset Type
0,Z23390746,Individual,FXAIX,FIDELITY 500 INDEX FUND,749.338,241.16,-$1.03,180710.35,-771.82,-0.43%,38146.41,+26.75%,14.34%,142563.94,$190.25,Cash,Stock
1,Z23390746,Individual,FSKAX,FIDELITY TOTAL MARKET INDEX FUND,658.109,189.75,-$1.07,124876.18,-704.18,-0.57%,22576.29,+22.06%,9.91%,102299.89,$155.45,Cash,Stock
2,Z23390746,Individual,912797SQ1,UNITED STATES TREAS BILLS ZERO CPN 0.00000% 02...,100000.0,99.921,+$0.032,99921.0,32.0,+0.03%,482.56,+0.48%,7.93%,99438.44,--,Cash,Bond
3,Z23390746,Individual,912797SR9,UNITED STATES TREAS BILLS ZERO CPN 0.00000% 02...,100000.0,99.849,+$0.031,99849.0,31.0,+0.03%,128.61,+0.12%,7.92%,99720.39,--,Cash,Bond
4,Z23390746,Individual,912797SS7,UNITED STATES TREAS BILLS ZERO CPN 0.00000% 02...,100000.0,99.779,+$0.031,99779.0,31.0,+0.03%,336.67,+0.33%,7.92%,99442.33,--,Cash,Bond


## build_entity_cash_flows

In [32]:
from support_functions import data_loader
importlib.reload(data_loader)
from support_functions.data_loader import load_data

from support_functions import flow_builders
importlib.reload(flow_builders)
from support_functions.flow_builders import (
    filter_entity_transactions,
    filter_entity_positions,
    EntityCashFlows
)

data = load_data(data_dir)
transactions_df = data.transactions
positions_df = data.positions
latest_date = data.latest_date



Loading positions from: /Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Jan-31-2026.csv (Date: 2026-01-31)
Found 5 history files.


In [33]:
account_num, entity_name = test_case_pair[4]

filtered_hist = filter_entity_transactions(transactions_df, account_num, entity_name)
filtered_posi = filter_entity_positions(positions_df, account_num, entity_name)
[account_num, entity_name]

['86964', '84679P173']

In [34]:
cash_flows = []
total_invested = 0.0    
current_val = filtered_posi['Current Value'].iloc[0] if not filtered_posi.empty else 0.0
current_basis = filtered_posi['Cost Basis Total'].iloc[0] if not filtered_posi.empty else 0.0

In [35]:
for _, row in filtered_hist.iterrows():
    date = row['Run Date']
    amount = row['Amount ($)']
    flow = amount
    cash_flows.append((date, flow))
    
    if flow < 0:
        total_invested += abs(flow)

In [36]:
[total_invested,current_basis, current_val]

[39879.45000000001, np.float64(20404.91), np.float64(21056.77)]

In [115]:
cash_flows

[(Timestamp('2024-07-22 00:00:00'), -2000.0),
 (Timestamp('2024-10-04 00:00:00'), 6.64),
 (Timestamp('2024-10-04 00:00:00'), -6.64),
 (Timestamp('2024-10-04 00:00:00'), -1000.0),
 (Timestamp('2024-12-20 00:00:00'), -11.12),
 (Timestamp('2024-12-20 00:00:00'), 11.12),
 (Timestamp('2025-04-04 00:00:00'), 9.22),
 (Timestamp('2025-04-04 00:00:00'), -9.22),
 (Timestamp('2025-07-11 00:00:00'), -10.13),
 (Timestamp('2025-07-11 00:00:00'), 10.13),
 (Timestamp('2025-09-18 00:00:00'), -499.0),
 (Timestamp('2025-10-03 00:00:00'), 11.89),
 (Timestamp('2025-10-03 00:00:00'), -11.89),
 (Timestamp('2025-12-19 00:00:00'), 12.87),
 (Timestamp('2025-12-19 00:00:00'), -12.87)]

In [95]:
positions_df[positions_df['Account Number']==account_num]

Unnamed: 0,Account Number,Account Name,Symbol,Description,Quantity,Last Price,Last Price Change,Current Value,Today's Gain/Loss Dollar,Today's Gain/Loss Percent,Total Gain/Loss Dollar,Total Gain/Loss Percent,Percent Of Account,Cost Basis Total,Average Cost Basis,Type,Asset Type
34,241802439,Health Savings Account,FZILX,FIDELITY ZERO INTERNATIONAL INDEX,281.381,15.59,-$0.24,4386.72,-67.54,-1.52%,550.58,+14.35%,42.37%,3836.14,$13.63,Cash,Stock
35,241802439,Health Savings Account,FXAIX,FIDELITY 500 INDEX FUND,17.804,241.16,-$1.03,4293.61,-18.34,-0.43%,732.74,+20.57%,41.47%,3560.87,$200.00,Cash,Stock
36,241802439,Health Savings Account,FDRXX**,HELD IN MONEY MARKET,0.0,0.0,,1673.02,0.0,,0.0,,16.16%,0.0,,Cash,Cash


In [97]:
transactions_df[transactions_df['Account Number']==account_num]['Description'].unique()

array(['No Description', 'FDIC INSURED DEPOSIT AT LEADER BANK HSA',
       'FIDELITY GOVERNMENT CASH RESERVES', 'FIDELITY 500 INDEX FUND',
       'FIDELITY ZERO INTERNATIONAL INDEX'], dtype=object)

In [103]:
transactions_df[
    (transactions_df['Account Number']==account_num) &
    (transactions_df['Description']=='FIDELITY ZERO INTERNATIONAL INDEX')
]

Unnamed: 0,Run Date,Account,Account Number,Action,Symbol,Description,Type,Price ($),Quantity,Commission ($),Fees ($),Accrued Interest ($),Amount ($),Settlement Date,Asset Type
462,2025-08-06,Health Savings Account,241802439,YOU BOUGHT PROSPECTUS UNDER SEPARATE COVER FID...,FZILX,FIDELITY ZERO INTERNATIONAL INDEX,Cash,13.61,273.932,0.0,0.0,0.0,-3728.21,2025-08-07,Stock
344,2025-12-12,Health Savings Account,241802439,DIVIDEND RECEIVED FIDELITY ZERO INTERNATIONAL ...,FZILX,FIDELITY ZERO INTERNATIONAL INDEX,Cash,0.0,0.0,0.0,0.0,0.0,107.93,NaT,Stock
343,2025-12-12,Health Savings Account,241802439,REINVESTMENT FIDELITY ZERO INTERNATIONAL INDEX...,FZILX,FIDELITY ZERO INTERNATIONAL INDEX,Cash,14.49,7.449,0.0,0.0,0.0,-107.93,NaT,Stock


In [68]:
conditions = [
    transactions_df['Description'] == "FID BLUE CHIP GR K6",
    transactions_df['Description'] == "SP 500 INDEX PL CL E",
    transactions_df['Description'] == "SP 500 INDEX PL CL F"
]
choices = ["FBCGX", "84679P173", "84679P173"]
transactions_df['Symbol'] = np.select(conditions, choices, default=transactions_df.get('Symbol', None))

transactions_df[transactions_df['Account Number']=='86964']

Unnamed: 0,Run Date,Account,Account Number,Action,Symbol,Description,Type,Price ($),Quantity,Commission ($),Fees ($),Accrued Interest ($),Amount ($),Settlement Date,Asset Type
294,2024-01-26,ERNST & YOUNG 401(K),86964,Contributions,FBCGX,FID BLUE CHIP GR K6,,3.716,0.0,0.0,0.0,0.0,103.85,NaT,Stock
295,2024-01-26,ERNST & YOUNG 401(K),86964,Contributions,84679P173,SP 500 INDEX PL CL E,,1.070,0.0,0.0,0.0,0.0,242.30,NaT,Stock
284,2024-02-09,ERNST & YOUNG 401(K),86964,Contributions,84679P173,SP 500 INDEX PL CL E,,1.041,0.0,0.0,0.0,0.0,242.30,NaT,Stock
283,2024-02-09,ERNST & YOUNG 401(K),86964,Contributions,FBCGX,FID BLUE CHIP GR K6,,3.542,0.0,0.0,0.0,0.0,103.85,NaT,Stock
277,2024-02-23,ERNST & YOUNG 401(K),86964,Contributions,84679P173,SP 500 INDEX PL CL E,,1.027,0.0,0.0,0.0,0.0,242.30,NaT,Stock
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
700,2026-01-02,ERNST & YOUNG 401(K),86964,RECORDKEEPING FEE,FBCGX,FID BLUE CHIP GR K6,,-0.030,0.0,0.0,0.0,0.0,-1.32,NaT,Stock
690,2026-01-09,ERNST & YOUNG 401(K),86964,Contributions,FBCGX,FID BLUE CHIP GR K6,,3.049,0.0,0.0,0.0,0.0,136.71,NaT,Stock
691,2026-01-09,ERNST & YOUNG 401(K),86964,Contributions,84679P173,SP 500 INDEX PL CL F,,0.964,0.0,0.0,0.0,0.0,318.98,NaT,Stock
679,2026-01-23,ERNST & YOUNG 401(K),86964,Contributions,84679P173,SP 500 INDEX PL CL F,,0.970,0.0,0.0,0.0,0.0,318.98,NaT,Stock


In [70]:
transactions_df[transactions_df['Account Number']=='86964']['Symbol'].unique()

array(['FBCGX', '84679P173'], dtype=object)

[3836.14, np.float64(3836.14), np.float64(4386.72)]

[(Timestamp('2025-08-06 00:00:00'), -3728.21),
 (Timestamp('2025-12-12 00:00:00'), 107.93),
 (Timestamp('2025-12-12 00:00:00'), -107.93)]

# math_utils

## get_total_pnl

In [4]:
from support_functions import data_loader
importlib.reload(data_loader)
from support_functions.data_loader import load_data

from support_functions import flow_builders
importlib.reload(flow_builders)
from support_functions.flow_builders import (
    build_entity_cash_flows
)

In [5]:
data = load_data(data_dir)
transactions_df = data.transactions
positions_df = data.positions
latest_date = data.latest_date

Loading positions from: /Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Jan-31-2026.csv (Date: 2026-01-31)
Found 5 history files.


In [6]:
account_num, entity_name = test_case_pair[4]

entity_cash_flows = build_entity_cash_flows(data, account_num, entity_name)

In [7]:
cash_flows = entity_cash_flows.cash_flows
total_invested = entity_cash_flows.total_invested
current_value = entity_cash_flows.current_value
current_basis = entity_cash_flows.current_basis
latest_date = entity_cash_flows.latest_date

In [8]:
total_pnl = sum(amount for date, amount in cash_flows)
total_pnl

-20404.91

In [9]:
cash_flows

[(Timestamp('2024-01-26 00:00:00'), -242.3),
 (Timestamp('2024-02-09 00:00:00'), -242.3),
 (Timestamp('2024-02-23 00:00:00'), -242.3),
 (Timestamp('2024-03-08 00:00:00'), -242.3),
 (Timestamp('2024-03-22 00:00:00'), -242.3),
 (Timestamp('2024-04-01 00:00:00'), -0.15),
 (Timestamp('2024-04-01 00:00:00'), 2.97),
 (Timestamp('2024-04-05 00:00:00'), -242.3),
 (Timestamp('2024-04-19 00:00:00'), -242.3),
 (Timestamp('2024-05-03 00:00:00'), -242.3),
 (Timestamp('2024-05-17 00:00:00'), -242.3),
 (Timestamp('2024-05-31 00:00:00'), -242.3),
 (Timestamp('2024-06-14 00:00:00'), -242.3),
 (Timestamp('2024-06-28 00:00:00'), -242.3),
 (Timestamp('2024-07-01 00:00:00'), 2.94),
 (Timestamp('2024-07-01 00:00:00'), -0.06),
 (Timestamp('2024-07-12 00:00:00'), -242.3),
 (Timestamp('2024-07-26 00:00:00'), -242.3),
 (Timestamp('2024-08-09 00:00:00'), -242.3),
 (Timestamp('2024-08-23 00:00:00'), -301.76),
 (Timestamp('2024-09-06 00:00:00'), -247.16),
 (Timestamp('2024-09-20 00:00:00'), -247.16),
 (Timestamp('

In [25]:
[total_invested,current_basis,current_value]

[39879.45000000001, np.float64(20404.91), np.float64(21056.77)]

## get_macaulay_duration

In [146]:
from support_functions import data_loader
importlib.reload(data_loader)
from support_functions.data_loader import load_data

from support_functions import flow_builders
importlib.reload(flow_builders)
from support_functions.flow_builders import (
    build_entity_cash_flows
)

from support_functions import math_utils
importlib.reload(math_utils)
from support_functions.math_utils import (
    xirr
)

In [147]:
data = load_data(data_dir)
transactions_df = data.transactions
positions_df = data.positions
latest_date = data.latest_date

Loading positions from: /Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Jan-31-2026.csv (Date: 2026-01-31)
Found 5 history files.


In [187]:
account_num = "Z23390746"
entity_name = "912797RJ8" # AAPL & FXAIX & 912797SZ1 & 912797RJ8

entity_cash_flows = build_entity_cash_flows(data, account_num, entity_name)

In [188]:
cash_flows = entity_cash_flows.cash_flows
total_invested = entity_cash_flows.total_invested
current_value = entity_cash_flows.current_value
current_basis = entity_cash_flows.current_basis
latest_date = entity_cash_flows.latest_date

In [189]:
discount_rate=0.045

In [190]:
times = []
pvs = []

for date, amount in cash_flows:
    t = (date - cash_flows[0][0]).days / 365.0
    pv = amount * np.exp(-discount_rate * t)
    
    times.append(t)
    pvs.append(pv)

abs_pvs = np.abs(pvs)
total_pv = np.sum(abs_pvs)

# if total_pv == 0: return 0

mac_duration = np.sum(np.array(times) * abs_pvs) / total_pv
mac_duration

np.float64(0.06024086370048307)

In [191]:
invested_cash_flows = [flow for flow in cash_flows if flow[1] < 0]
dates, amounts = zip(*invested_cash_flows)
amounts = pd.Series(amounts)
amounts_ratio = amounts/amounts.sum()
holding_period = pd.Series([(latest_date - d).days for d in dates])
(amounts_ratio*holding_period).sum()/365.0

np.float64(0.1643835616438356)

In [192]:
dates, amounts = zip(*cash_flows)
amounts = pd.Series(amounts)

# 计算绝对投入总额（分母通常使用总流出，即总成本）
total_outflow = abs(amounts[amounts < 0].sum())


# 计算每一笔现金流距离结算日的天数
# 注意：这里的 amounts 包含正负值
holding_days = pd.Series([(latest_date - d).days for d in dates])

# (金额 * 天数) 的总和 / 总投入成本
# 负向现金流产生的“持有天数”会被正向现金流（卖出）抵消
weighted_days = (amounts * holding_days).sum()

# 因为流入是正，流出是负，所以结果通常是负数，取绝对值
abs(weighted_days / total_outflow) / 365.0

np.float64(0.12035789963026547)

# PortfolioAnalyzer

In [106]:
from support_functions import portfolio_analyzer
importlib.reload(portfolio_analyzer)
from support_functions.portfolio_analyzer import PortfolioAnalyzer

In [107]:
analyzer = PortfolioAnalyzer(data_dir, output_dir)

Loading positions from: /Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Jan-31-2026.csv (Date: 2026-01-31)
Found 5 history files.


## analyze_entity_in_account

In [4]:
from support_functions import portfolio_analyzer
importlib.reload(portfolio_analyzer)
from support_functions.portfolio_analyzer import (
    PortfolioAnalyzer,
)

from support_functions import flow_builders
importlib.reload(flow_builders)
from support_functions.flow_builders import (
    build_entity_cash_flows,
)

from support_functions import math_utils
importlib.reload(math_utils)
from support_functions.math_utils import (
    calculate_metrics,
)

analyzer = PortfolioAnalyzer(data_dir, output_dir)

Loading positions from: /Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Jan-31-2026.csv (Date: 2026-01-31)
Found 5 history files.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  transactions_df['Symbol'] = transactions_df['Description'].map(symbol_map).fillna(transactions_df.get('Symbol', None))


In [21]:
account_num, entity_name = test_case_pair[8]
data = analyzer.data
transactions = data.transactions
position = data.positions
account_name = data.account_map.get(account_num, "Unknown")

In [22]:
entity_cash_flows = build_entity_cash_flows(data, account_num, entity_name)

In [23]:
metrics = calculate_metrics(entity_cash_flows)

In [24]:
result = pd.Series({
    'Account Number': account_num,
    'Account Name': account_name,
    'Entity Name': entity_name,
    'Total Invested': entity_cash_flows.total_invested,
    'Current Basis': entity_cash_flows.current_basis,
    'Current Value': entity_cash_flows.current_value,
    'Total PnL': metrics['Total PnL'],
    'IRR (%)': f"{metrics['IRR']:.2%}" if metrics['IRR'] is not None else "N/A",
    'Total Return (%)': f"{metrics['ROI']:.2%}",
    'Holding Period (Y)': f"{metrics['Holding Period (Y)']:.2f}"
})
result

Account Number                     241802439
Account Name          Health Savings Account
Entity Name                            FXAIX
Total Invested                       3560.87
Current Basis                        3560.87
Current Value                        4293.61
Total PnL                             794.61
IRR (%)                               16.81%
Total Return (%)                      22.32%
Holding Period (Y)                      1.29
dtype: object

In [31]:
pd.DataFrame(entity_cash_flows.cash_flows).to_csv("/Users/yifanli/Github/fidelity-portfolio-tracker/data/temp.csv")

In [49]:
transactions[transactions['Account Number'] == '86964']

Unnamed: 0,Run Date,Account,Account Number,Action,Symbol,Description,Type,Price ($),Quantity,Commission ($),Fees ($),Accrued Interest ($),Amount ($),Settlement Date,Asset Type
294,2024-01-26,ERNST & YOUNG 401(K),86964,Contributions,FBCGX,FID BLUE CHIP GR K6,,3.716,0.0,0.0,0.0,0.0,103.85,NaT,Stock
295,2024-01-26,ERNST & YOUNG 401(K),86964,Contributions,84679P173,SP 500 INDEX PL CL E,,1.070,0.0,0.0,0.0,0.0,242.30,NaT,Stock
284,2024-02-09,ERNST & YOUNG 401(K),86964,Contributions,84679P173,SP 500 INDEX PL CL E,,1.041,0.0,0.0,0.0,0.0,242.30,NaT,Stock
283,2024-02-09,ERNST & YOUNG 401(K),86964,Contributions,FBCGX,FID BLUE CHIP GR K6,,3.542,0.0,0.0,0.0,0.0,103.85,NaT,Stock
277,2024-02-23,ERNST & YOUNG 401(K),86964,Contributions,84679P173,SP 500 INDEX PL CL E,,1.027,0.0,0.0,0.0,0.0,242.30,NaT,Stock
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
700,2026-01-02,ERNST & YOUNG 401(K),86964,RECORDKEEPING FEE,FBCGX,FID BLUE CHIP GR K6,,-0.030,0.0,0.0,0.0,0.0,-1.32,NaT,Stock
690,2026-01-09,ERNST & YOUNG 401(K),86964,Contributions,FBCGX,FID BLUE CHIP GR K6,,3.049,0.0,0.0,0.0,0.0,136.71,NaT,Stock
691,2026-01-09,ERNST & YOUNG 401(K),86964,Contributions,84679P173,SP 500 INDEX PL CL F,,0.964,0.0,0.0,0.0,0.0,318.98,NaT,Stock
679,2026-01-23,ERNST & YOUNG 401(K),86964,Contributions,84679P173,SP 500 INDEX PL CL F,,0.970,0.0,0.0,0.0,0.0,318.98,NaT,Stock
