# HW_4


In [10]:
import pandas as pd
import numpy as np
from pathlib import Path

# IMPORTANT: CSV files must be in same folder as this notebook
data_dir = Path('.')

paths = {
    'IBM': data_dir / 'ibm-1.csv',
    'MSFT': data_dir / 'msft-1.csv',
    'GOOG': data_dir / 'goog-1.csv',
    'AAPL': data_dir / 'aapl-1.csv',
    'AMZN': data_dir / 'amzn-1.csv',
    'META': data_dir / 'meta-1.csv',
}

def load_yahoo_csv(path: Path) -> pd.DataFrame:
    df = pd.read_csv(path)
    df['Date'] = pd.to_datetime(df['Date'], format='%d-%b-%y', errors='coerce')
    if 'Volume' in df.columns:
        df['Volume'] = df['Volume'].astype(str).str.replace(',', '', regex=False)
    for c in ['Open','High','Low','Close','Adj Close','Volume']:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors='coerce')
    df = df.dropna(subset=['Date']).sort_values('Date').reset_index(drop=True)
    return df

stocks = {sym: load_yahoo_csv(p) for sym, p in paths.items()}
{k: stocks[k].shape for k in stocks}

{'IBM': (250, 7),
 'MSFT': (250, 7),
 'GOOG': (250, 7),
 'AAPL': (250, 7),
 'AMZN': (250, 7),
 'META': (250, 7)}

## Problem 1 — IBM 2018

In [19]:
# Convert to Japanese Yen (USDJPY = 107)
USDJPY = 107

ibm_jpy = ibm.copy()

for c in ['Open','High','Low','Close','Adj Close','Trade Value']:
    ibm_jpy[c] *= USDJPY

ibm_jpy = ibm_jpy.rename(columns={
    'Open':'Open (JPY)',
    'High':'High (JPY)',
    'Low':'Low (JPY)',
    'Close':'Close (JPY)',
    'Adj Close':'Adj Close (JPY)',
    'Trade Value':'Trade Value (JPY)'
})

ibm_jpy.head()

Unnamed: 0,Date,Open (JPY),High (JPY),Low (JPY),Close (JPY),Adj Close (JPY),Volume,Trade Value (JPY)
0,2018-01-02,15804.97,15836.0,15706.53,15779.29,11505.71,4395815,69362840000.0
1,2018-01-03,16094.94,16347.46,15992.22,16212.64,11822.43,9875914,160114600000.0
2,2018-01-04,16331.41,16604.26,16302.52,16541.13,12061.04,7903785,130737500000.0
3,2018-01-05,16617.1,16664.18,16480.14,16621.38,12119.89,5434807,90333990000.0
4,2018-01-08,16639.57,16766.9,16541.13,16721.96,12193.72,5478425,91610000000.0


## Problem 2 — Dividend Detection

In [17]:
def compute_dividends(df: pd.DataFrame, diff_thresh: float = 0.001) -> pd.DataFrame:
    r_close = df['Close'].shift(1) / df['Close']
    r_adj = df['Adj Close'].shift(1) / df['Adj Close']
    diff = r_close - r_adj
    dividend = diff * df['Close']
    out = pd.DataFrame({'Date': df['Date'], 'Dividend': dividend, 'Diff': diff}).dropna()
    out = out.loc[out['Diff'].abs() > diff_thresh]
    out = out.loc[out['Dividend'] > 0]
    return out[['Date','Dividend']].reset_index(drop=True)

dividends = {sym: compute_dividends(df) for sym, df in stocks.items()}

for sym in dividends:
    print("\n", sym)
    display(dividends[sym])


 IBM


Unnamed: 0,Date,Dividend
0,2018-02-08,1.430577
1,2018-05-09,1.49822
2,2018-08-09,1.49851
3,2018-11-08,1.504961



 MSFT


Unnamed: 0,Date,Dividend
0,2018-02-14,0.424499
1,2018-05-16,0.426192
2,2018-08-15,0.421855
3,2018-11-14,0.466496



 GOOG


Unnamed: 0,Date,Dividend



 AAPL


Unnamed: 0,Date,Dividend
0,2018-02-09,0.165669
1,2018-05-11,0.180963
2,2018-08-10,0.193091
3,2018-11-08,0.192223



 AMZN


Unnamed: 0,Date,Dividend



 META


Unnamed: 0,Date,Dividend
