# ETHUSD Minute Data
- Resampling into Daily Data
- Daily RV
- daily returns (pct change)

In [16]:
import pandas as pd
import numpy as np

# Load the BTC/USD dataset
file_path =  "../../data/raw/minute/ethusd.csv"

save_path = "../../data/processed/minute/ethusd.csv"

data = pd.read_csv(file_path)

# Convert Unix Timestamp (milliseconds) to datetime
data['time'] = pd.to_datetime(data['time'], unit='ms')
data.set_index('time', inplace=True)
data.sort_index(inplace=True)

# Calculate intraday returns (log returns)
data['Log Return'] = np.log(data['close'] / data['close'].shift(1))
# Remove rows with NaN returns (due to the shift operation)
data.dropna(subset=['Log Return'], inplace=True)

# Calculate realized variance
rv_1d = data.resample('1D')['Log Return'].apply(lambda x: np.sum(x**2))


drop_columns = ['Log Return']
data.drop(columns=drop_columns, inplace=True)
daily_data = data.resample('1D').agg({
    'open': 'first',        # First open price of the day
    'close': 'last',        # Last close price of the day
    'high': 'max',          # Highest price of the day
    'low': 'min',           # Lowest price of the day
    'volume': 'sum',        # Total volume for the day
})
daily_data['1D RV'] = rv_1d

# Drop any rows with NaN values (e.g., incomplete days)
daily_data.dropna(subset=['open', 'close', 'high', 'low', 'volume', '1D RV'], inplace=True)
# Calculate daily returns from the daily close price
daily_data['daily_return'] = daily_data['close'].pct_change()
#rename columns
daily_data.rename(columns={'1D RV': 'realized_variance'}, inplace=True)
daily_data.head()
daily_data.to_csv("../../data/ethusd_group_project.csv", index=True)



In [None]:
def calculate_parkinson(data, window):
    """
    Calculate Parkinson Volatility using high and low prices over a rolling window.
    
    Parameters:
    - data: DataFrame with 'High' and 'Low' columns.
    - window: Rolling window size for Parkinson Volatility (e.g., 7 or 30 days).
    
    Returns:
    - Series containing Parkinson Volatility values.
    """
    parkinson_vol = np.sqrt(
        (1 / (4 * np.log(2) * window)) *
        (np.log(data['High'] / data['Low']) ** 2).rolling(window=window).sum()
    )
    return parkinson_vol
