In [14]:
import pandas as pd
import numpy as np
from datetime import datetime
from pathlib import Path
import os
import glob
from scipy import optimize
import importlib

In [2]:
project_path = os.getcwd()

## load_data

### get_latest_position_file

In [3]:
data_dir = f'{project_path}/data'

In [4]:
files = glob.glob(os.path.join(data_dir, 'Portfolio_Positions_*.csv'))
files

['/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Apr-06-2025.csv',
 '/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Aug-05-2025.csv',
 '/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Nov-16-2025.csv',
 '/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Apr-29-2025.csv',
 '/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Dec-31-2025.csv',
 '/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Apr-02-2025.csv',
 '/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_May-06-2025.csv']

In [5]:
latest_file = None
latest_date = None

for f in files:
    basename = os.path.basename(f)
    date_part = basename.replace('Portfolio_Positions_', '').replace('.csv', '')
    try:
        date_obj = datetime.strptime(date_part, '%b-%d-%Y')
        if latest_date is None or date_obj > latest_date:
            latest_date = date_obj
            latest_file = f
    except ValueError:
        continue
[latest_file, latest_date]

['/Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Dec-31-2025.csv',
 datetime.datetime(2025, 12, 31, 0, 0)]

### clean_positions

In [None]:
from support_functions.analysis import get_latest_position_file, clean_currency

data_dir = f'{project_path}/data'
pos_file, pos_date = get_latest_position_file(data_dir)
positions_df = pd.read_csv(pos_file, index_col=False)

cols_to_clean = [
    'Last Price', 'Current Value', 
    'Cost Basis Total', 'Today\'s Gain/Loss Dollar', 
    'Total Gain/Loss Dollar'
]

In [9]:
for col in cols_to_clean:
    if col in positions_df.columns:
        positions_df[col] = positions_df[col].apply(clean_currency)

# Clean Quantity (remove match for formatting issues if any)
if 'Quantity' in positions_df.columns:
    positions_df['Quantity'] = pd.to_numeric(positions_df['Quantity'], errors='coerce').fillna(0)

### load_transactions

In [10]:
data_dir = f'{project_path}/data'
hist_files = glob.glob(os.path.join(data_dir, 'Accounts_History_*.csv'))
transactions_dfs = []
print(f"Found {len(hist_files)} history files.")

for f in hist_files:
    df = pd.read_csv(f, on_bad_lines='skip') 
    transactions_dfs.append(df)

transactions_df = pd.concat(transactions_dfs, ignore_index=True)

Found 4 history files.


### clean_transactions

In [15]:
from support_functions import analysis
importlib.reload(analysis)
from support_functions.analysis import load_transactions
data_dir = f'{project_path}/data'
transactions_df = load_transactions(data_dir)

Found 4 history files.


In [16]:
transactions_df['Run Date'] = pd.to_datetime(transactions_df['Run Date'], errors='coerce')
# Sometimes 'Settlement Date' exists
if 'Settlement Date' in transactions_df.columns:
        transactions_df['Settlement Date'] = pd.to_datetime(transactions_df['Settlement Date'], errors='coerce')
# remove space in the beginning of symbol column
if 'Symbol' in transactions_df.columns:
    transactions_df['Symbol'] = transactions_df['Symbol'].str.strip()

# Clean numeric columns
hist_numeric_cols = [
'Amount ($)', 'Price ($)', 'Quantity', 
    'Commission ($)', 'Fees ($)', 'Accrued Interest ($)'
]
for col in hist_numeric_cols:
    if col in transactions_df.columns:
        transactions_df[col] = transactions_df[col].apply(clean_currency)
    
# Sort by date
transactions_df = transactions_df.sort_values('Run Date')

### load_data

In [17]:
from support_functions.analysis import (
    get_latest_position_file, clean_positions,
    load_transactions, clean_transactions
)

In [18]:
data_dir = f'{project_path}/data'
pos_file, pos_date = get_latest_position_file(data_dir)
print(f"Loading positions from: {pos_file} (Date: {pos_date.strftime('%Y-%m-%d')})")
positions_df = pd.read_csv(pos_file, index_col=False)

positions_df = clean_positions(positions_df)


Loading positions from: /Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Dec-31-2025.csv (Date: 2025-12-31)


In [19]:
transactions_df = load_transactions(data_dir)
    
transactions_df = clean_transactions(transactions_df)

Found 4 history files.


## analyze_symbol_performance

### categorize_asset

In [20]:
from support_functions.analysis import load_data

In [21]:
data_dir = f'{project_path}/data'
positions_df, history_df = load_data(data_dir)

Loading positions from: /Users/yifanli/Github/fidelity-portfolio-tracker/data/Portfolio_Positions_Dec-31-2025.csv (Date: 2025-12-31)
Found 4 history files.


In [8]:
i = 10
row = positions_df.loc[i]
print(row)

Account Number                Z23390746
Account Name                 Individual
Symbol                             AAPL
Description                   APPLE INC
Quantity                           70.0
Last Price                       271.86
Last Price Change                -$1.22
Current Value                   19030.2
Today's Gain/Loss Dollar          -85.4
Today's Gain/Loss Percent        -0.45%
Total Gain/Loss Dollar          8270.21
Total Gain/Loss Percent         +76.86%
Percent Of Account                1.52%
Cost Basis Total               10759.99
Average Cost Basis              $153.71
Type                               Cash
Name: 10, dtype: object


In [12]:
symbol = row['Symbol']
account_num = row['Account Number']
account_name = row['Account Name']
current_val = row['Current Value']
quantity = row['Quantity']
asset_type = categorize_asset(row)
asset_type

'Stock'

In [15]:
mask = (history_df['Account Number'] == account_num) & (history_df['Symbol'] == symbol)
symbol_hist = history_df[mask]

In [16]:
symbol_hist

Unnamed: 0,Run Date,Account,Account Number,Action,Symbol,Description,Type,Quantity,Price ($),Commission ($),Fees ($),Accrued Interest ($),Amount ($),Settlement Date
292,NaT,Individual,Z23390746,DIVIDEND RECEIVED APPLE INC (AAPL) (Cash),AAPL,APPLE INC,Cash,0.0,0.0,0.0,0.0,0.0,18.2,NaT
356,NaT,Individual,Z23390746,DIVIDEND RECEIVED APPLE INC (AAPL) (Cash),AAPL,APPLE INC,Cash,0.0,0.0,0.0,0.0,0.0,18.2,NaT
424,NaT,Individual,Z23390746,DIVIDEND RECEIVED APPLE INC (AAPL) (Cash),AAPL,APPLE INC,Cash,0.0,0.0,0.0,0.0,0.0,18.2,NaT
490,NaT,Individual,Z23390746,DIVIDEND RECEIVED APPLE INC (AAPL) (Cash),AAPL,APPLE INC,Cash,0.0,0.0,0.0,0.0,0.0,17.5,NaT


In [18]:
history_df[history_df['Symbol'] == 'AAPL']

Unnamed: 0,Run Date,Account,Account Number,Action,Symbol,Description,Type,Quantity,Price ($),Commission ($),Fees ($),Accrued Interest ($),Amount ($),Settlement Date
292,NaT,Individual,Z23390746,DIVIDEND RECEIVED APPLE INC (AAPL) (Cash),AAPL,APPLE INC,Cash,0.0,0.0,0.0,0.0,0.0,18.2,NaT
356,NaT,Individual,Z23390746,DIVIDEND RECEIVED APPLE INC (AAPL) (Cash),AAPL,APPLE INC,Cash,0.0,0.0,0.0,0.0,0.0,18.2,NaT
424,NaT,Individual,Z23390746,DIVIDEND RECEIVED APPLE INC (AAPL) (Cash),AAPL,APPLE INC,Cash,0.0,0.0,0.0,0.0,0.0,18.2,NaT
490,NaT,Individual,Z23390746,DIVIDEND RECEIVED APPLE INC (AAPL) (Cash),AAPL,APPLE INC,Cash,0.0,0.0,0.0,0.0,0.0,17.5,NaT
