# Homework: Stock Data Analysis

Includes all 6 stocks: IBM, MSFT, GOOG, AAPL, AMZN, META.


In [5]:
import pandas as pd
import numpy as np
from pathlib import Path

data_dir = Path('/mnt/data')

paths = {
    'IBM': data_dir/'ibm-1.csv',
    'MSFT': data_dir/'msft-1.csv',
    'GOOG': data_dir/'goog-1.csv',
    'AAPL': data_dir/'aapl-1.csv',
    'META': data_dir/'meta-1.csv',
    'AMZN': data_dir/'amzn-1.csv',
}

def load_yahoo_csv(path: Path) -> pd.DataFrame:
    df = pd.read_csv(path)
    df['Date'] = pd.to_datetime(df['Date'], format='%d-%b-%y', errors='coerce')
    if 'Volume' in df.columns:
        df['Volume'] = df['Volume'].astype(str).str.replace(',', '', regex=False)
    for c in ['Open','High','Low','Close','Adj Close','Volume']:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors='coerce')
    df = df.dropna(subset=['Date']).sort_values('Date').reset_index(drop=True)
    return df


## Problem 1 — IBM 2018

In [None]:
ibm = load_yahoo_csv(paths['IBM'])
ibm['Trade Value'] = ibm['Close'] * ibm['Volume']

highest_close = ibm['Close'].max()
avg_close = ibm['Close'].mean()

print(f'Highest Close (USD): {highest_close:.4f}')
print(f'Average Close (USD): {avg_close:.4f}')

In [None]:
USDJPY = 107
ibm_jpy = ibm.copy()

for c in ['Open','High','Low','Close','Adj Close','Trade Value']:
    ibm_jpy[c] = ibm_jpy[c] * USDJPY

ibm_jpy.head()

## Problem 2 — Dividend Detection (All 6 Stocks)

In [None]:
def compute_dividends(df: pd.DataFrame, min_abs_dividend: float = 0.001) -> pd.DataFrame:
    r_close = df['Close'].shift(1) / df['Close']
    r_adj = df['Adj Close'].shift(1) / df['Adj Close']
    dividend = (r_adj - r_close) * df['Close']
    out = pd.DataFrame({'Date': df['Date'], 'Dividend': dividend}).dropna()
    out = out.loc[out['Dividend'].abs() >= min_abs_dividend]
    out = out.loc[out['Dividend'] > 0].copy()
    out['Dividend'] = out['Dividend'].round(6)
    return out.reset_index(drop=True)

stocks = {sym: load_yahoo_csv(path) for sym, path in paths.items()}
dividends = {sym: compute_dividends(df) for sym, df in stocks.items()}

{sym: len(df) for sym, df in dividends.items()}

In [None]:
for sym, df in dividends.items():
    print("\n" + "="*60)
    print(sym)
    display(df)