# Midterm Project: Equity Portfolio Management (2018)
**Student:** Umair Ali

Universe (10 stocks): `IBM, MSFT, GOOG, AAPL, AMZN, META, NFLX, TSLA, ORCL, SAP`

Rules implemented:
- Trade (buy/sell) at **Close** prices
- Ranking based on **Adj Close** percentage change over the rebalance window
- Rebalance every *N* business days (default 5)
- Integer shares only; leftover cash stays in a **0% interest cash account**
- Dividends are credited to cash **only if shares were held before the dividend day** (shares bought on dividend day do not receive dividend)
- Daily MTM is tracked for all business days in 2018

> Put the CSV files in the same folder as this notebook (or adjust `data_dir`).


## 1) Load data

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

# If your CSVs are in the same folder as this notebook:
data_dir = Path('.')

universe = ['IBM','MSFT','GOOG','AAPL','AMZN','META','NFLX','TSLA','ORCL','SAP']

def find_csv_for_ticker(ticker: str, folder: Path) -> Path:
    """Find a CSV file for a ticker in a folder by checking common naming patterns."""
    candidates = [
        folder / f"{ticker}.csv",
        folder / f"{ticker.lower()}.csv",
        folder / f"{ticker.lower()}-1.csv",
        folder / f"{ticker.lower()}-1-1.csv",
        folder / f"{ticker.upper()}-1.csv",
        folder / f"{ticker.upper()}-1-1.csv",
    ]
    for p in candidates:
        if p.exists():
            return p
    # fallback: search any filename containing the ticker (case-insensitive)
    for p in folder.glob("*.csv"):
        if ticker.lower() in p.name.lower():
            return p
    raise FileNotFoundError(f"Could not find CSV for {ticker} in {folder.resolve()}")

def load_stock_df(ticker: str) -> pd.DataFrame:
    path = find_csv_for_ticker(ticker, data_dir)
    df = pd.read_csv(path)
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    for c in ['Open','High','Low','Close','Adj Close','Volume']:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors='coerce')
    df = df.dropna(subset=['Date','Close','Adj Close']).sort_values('Date').set_index('Date')
    # keep 2018 only
    df = df.loc['2018-01-01':'2018-12-31', ['Close','Adj Close']]
    return df

stocks = {tk: load_stock_df(tk) for tk in universe}

# Align all dates across all stocks (intersection)
date_index = None
for tk, df in stocks.items():
    date_index = df.index if date_index is None else date_index.intersection(df.index)

date_index = date_index.sort_values()

close_df = pd.DataFrame({tk: stocks[tk].loc[date_index,'Close'] for tk in universe}, index=date_index)
adj_df   = pd.DataFrame({tk: stocks[tk].loc[date_index,'Adj Close'] for tk in universe}, index=date_index)

close_df.head()

## 2) Compute dividends (per-share) from Close vs Adj Close

In [None]:
def compute_dividend_per_share(close_s: pd.Series, adj_s: pd.Series, diff_thresh: float = 0.001) -> pd.Series:
    r_close = close_s.shift(1) / close_s
    r_adj = adj_s.shift(1) / adj_s
    diff = r_close - r_adj  # positive on dividend days
    div = diff * close_s
    div = div.where(diff.abs() > diff_thresh, 0.0)
    div = div.where(div > 0, 0.0)  # keep only positive dividends
    return div.fillna(0.0)

div_per_share = pd.DataFrame({
    tk: compute_dividend_per_share(close_df[tk], adj_df[tk]) for tk in universe
}, index=date_index)

# Display detected dividend rows (non-zero) for a few names
for tk in ['IBM','MSFT','AAPL','ORCL','SAP']:
    nonzero = div_per_share.loc[div_per_share[tk] > 0, [tk]]
    print(tk, "dividend rows:", len(nonzero))
    display(nonzero.head(10))

## 3) Portfolio engine (MTM simulation)

In [None]:
INITIAL_CASH = 5_000_000
INITIAL_PICK = ['IBM','MSFT','GOOG','AAPL','AMZN']  # per project example

def buy_equal_allocation(cash: float, date: pd.Timestamp, tickers: list[str]) -> tuple[dict, float]:
    allocation = cash / len(tickers)
    holdings = {}
    leftover = 0.0
    for tk in tickers:
        price = float(close_df.loc[date, tk])
        shares = int(allocation // price)
        cost = shares * price
        holdings[tk] = shares
        leftover += allocation - cost
    return holdings, leftover

def holdings_value(holdings: dict, date: pd.Timestamp) -> float:
    return sum(shares * float(close_df.loc[date, tk]) for tk, shares in holdings.items())

def run_strategy(rebalance_days: int = 5, mode: str = 'buy_low'):
    assert mode in {'buy_low','buy_high'}
    start_date = pd.Timestamp('2018-01-02')
    if start_date not in date_index:
        start_date = date_index[0]

    # initial buy at start_date close
    cash = float(INITIAL_CASH)
    holdings, cash = buy_equal_allocation(cash, start_date, INITIAL_PICK)

    mtm = []
    cash_series = []
    last_reb_date = start_date

    # create rebalance dates by stepping through the aligned business-day index
    rebalance_dates = set()
    start_pos = date_index.get_loc(start_date)
    for pos in range(start_pos + rebalance_days, len(date_index), rebalance_days):
        rebalance_dates.add(date_index[pos])

    for d in date_index:
        # 1) Credit dividends for shares held BEFORE trading on day d
        div_cash = 0.0
        for tk, shares in holdings.items():
            div_cash += shares * float(div_per_share.loc[d, tk])
        cash += div_cash

        # 2) Rebalance (trade at Close) if needed
        if d in rebalance_dates:
            pct = (adj_df.loc[d] / adj_df.loc[last_reb_date]) - 1.0
            if mode == 'buy_low':
                picks = pct.sort_values().head(5).index.tolist()
            else:
                picks = pct.sort_values(ascending=False).head(5).index.tolist()

            # sell everything
            cash += holdings_value(holdings, d)
            holdings = {}

            # buy new picks equally
            holdings, cash = buy_equal_allocation(cash, d, picks)
            last_reb_date = d

        # 3) Daily MTM at end of day d
        mtm_val = cash + holdings_value(holdings, d)
        mtm.append(mtm_val)
        cash_series.append(cash)

    mtm_s = pd.Series(mtm, index=date_index, name=f"MTM_{mode}_{rebalance_days}d")
    cash_s = pd.Series(cash_series, index=date_index, name="cash")
    return mtm_s, cash_s

mtm_low_5, cash_low_5 = run_strategy(5, 'buy_low')
mtm_high_5, cash_high_5 = run_strategy(5, 'buy_high')

mtm_low_5.tail(1), mtm_high_5.tail(1)

## 4) High Tech Index + comparison plot

In [None]:
high_tech_index = close_df.mean(axis=1).rename('HighTechIndex')

def normalize_to_start(s: pd.Series) -> pd.Series:
    return s / s.iloc[0]

plt.figure()
plt.plot(normalize_to_start(mtm_low_5).index, normalize_to_start(mtm_low_5).values, label='MTM buy_low (5d)')
plt.plot(normalize_to_start(mtm_high_5).index, normalize_to_start(mtm_high_5).values, label='MTM buy_high (5d)')
plt.plot(normalize_to_start(high_tech_index).index, normalize_to_start(high_tech_index).values, label='High Tech Index')
plt.title('Normalized Performance (Start = 1.0)')
plt.xlabel('Date')
plt.ylabel('Normalized Value')
plt.legend()
plt.show()

## 5) Convert MTM USD to JPY using USD/JPY 2018 data

In [None]:
def load_usdjpy() -> pd.Series:
    # expects usdjpy.csv in same folder; falls back to any file containing 'usdjpy'
    path = None
    for p in [data_dir/'usdjpy.csv', data_dir/'USDJPY.csv', data_dir/'usdjpy-1.csv']:
        if p.exists():
            path = p
            break
    if path is None:
        for p in data_dir.glob('*.csv'):
            if 'usdjpy' in p.name.lower() or 'jpy=x' in p.name.lower():
                path = p
                break
    if path is None:
        raise FileNotFoundError('Could not find USDJPY CSV (expected usdjpy.csv).')

    fx = pd.read_csv(path)
    fx['Date'] = pd.to_datetime(fx['Date'], errors='coerce')
    fx = fx.dropna(subset=['Date']).sort_values('Date').set_index('Date')
    fx['Close'] = pd.to_numeric(fx['Close'], errors='coerce')
    fx = fx.loc['2018-01-01':'2018-12-31', ['Close']].dropna()
    return fx['Close'].rename('USDJPY')

usd_jpy = load_usdjpy()
usd_jpy_aligned = usd_jpy.reindex(date_index).ffill()

mtm_low_5_jpy = (mtm_low_5 * usd_jpy_aligned).rename('MTM_buy_low_5d_JPY')

plt.figure()
plt.plot(normalize_to_start(mtm_low_5).index, normalize_to_start(mtm_low_5).values, label='MTM USD (buy_low 5d)')
plt.plot(normalize_to_start(mtm_low_5_jpy).index, normalize_to_start(mtm_low_5_jpy).values, label='MTM JPY (buy_low 5d)')
plt.title('MTM in USD vs JPY (Normalized)')
plt.xlabel('Date')
plt.ylabel('Normalized Value')
plt.legend()
plt.show()

mtm_low_5.tail(1), mtm_low_5_jpy.tail(1)

## 6) Optimize rebalance interval (Question 9)

In [None]:
def optimize_interval(mode: str, min_days: int = 1, max_days: int = 20):
    results = []
    for d in range(min_days, max_days + 1):
        mtm_s, _ = run_strategy(d, mode)
        results.append((d, float(mtm_s.iloc[-1])))
    res_df = pd.DataFrame(results, columns=['rebalance_days','final_MTM_USD']).sort_values('final_MTM_USD', ascending=False)
    return res_df

opt_low = optimize_interval('buy_low', 1, 20)
opt_high = optimize_interval('buy_high', 1, 20)

print('Top 10 (buy_low):')
display(opt_low.head(10))

print('Top 10 (buy_high):')
display(opt_high.head(10))

best_low = opt_low.iloc[0]
best_high = opt_high.iloc[0]
best_low, best_high

## 7) Plot best interval strategies vs High Tech Index

In [None]:
best_low_days = int(best_low['rebalance_days'])
best_high_days = int(best_high['rebalance_days'])

mtm_low_best, _ = run_strategy(best_low_days, 'buy_low')
mtm_high_best, _ = run_strategy(best_high_days, 'buy_high')

plt.figure()
plt.plot(normalize_to_start(mtm_low_best).index, normalize_to_start(mtm_low_best).values, label=f'buy_low best ({best_low_days}d)')
plt.plot(normalize_to_start(mtm_high_best).index, normalize_to_start(mtm_high_best).values, label=f'buy_high best ({best_high_days}d)')
plt.plot(normalize_to_start(high_tech_index).index, normalize_to_start(high_tech_index).values, label='High Tech Index')
plt.title('Best Interval Strategies vs High Tech Index (Normalized)')
plt.xlabel('Date')
plt.ylabel('Normalized Value')
plt.legend()
plt.show()

## 8) Final summary

In [None]:
summary = pd.DataFrame({
    'buy_low_5d_final_USD': [float(mtm_low_5.iloc[-1])],
    'buy_high_5d_final_USD': [float(mtm_high_5.iloc[-1])],
    'best_buy_low_days': [best_low_days],
    'best_buy_low_final_USD': [float(best_low['final_MTM_USD'])],
    'best_buy_high_days': [best_high_days],
    'best_buy_high_final_USD': [float(best_high['final_MTM_USD'])],
})
summary