In [None]:
import pandas as pd
import numpy as np
import pywt

def extract_wavelet_features(data, scales=np.arange(1,128), wavelet='morl'):
    coeffs, _ = pywt.cwt(data, scales, wavelet)
    energy = np.abs(coeffs)**2
    me = np.mean(energy)
    max_e = np.max(energy)
    dom_scale = scales[np.argmax(np.mean(energy, axis=1))]
    return me, max_e, dom_scale

def build_wavelet_feature_df_temporal(file_name, min_points=10):
    df = pd.read_csv(file_name)
    df.columns = df.columns.str.strip()
    df['Datetime'] = pd.to_datetime(df[['Year','Month','Date','Hour']].rename(columns={'Year':'year','Month':'month','Date':'day','Hour':'hour'}))
    df.sort_values('Datetime', inplace=True)
    features = []
    for mod, group in df.groupby('MODZCTA'):
        group = group.sort_values('Datetime')
        if len(group) < min_points:
            continue
        ts = group['Count'].values
        me, max_e, dom_scale = extract_wavelet_features(ts)
        features.append({'MODZCTA': str(int(mod)), 'mean_energy': me, 'max_energy': max_e, 'dominant_scale': dom_scale})
    return pd.DataFrame(features)

wf_high = build_wavelet_feature_df_temporal('HighRiskData_Temporal.csv', min_points=10)
# wf_med = build_wavelet_feature_df_temporal('MediumRiskData_Temporal.csv', min_points=10)
# wf_low = build_wavelet_feature_df_temporal('LowRiskData_Temporal.csv', min_points=10)

wf_high = wf_high.rename(columns={'mean_energy':'high_mean_energy','max_energy':'high_max_energy','dominant_scale':'high_dominant_scale'})
# wf_med = wf_med.rename(columns={'mean_energy':'med_mean_energy','max_energy':'med_max_energy','dominant_scale':'med_dominant_scale'})
# wf_low = wf_low.rename(columns={'mean_energy':'low_mean_energy','max_energy':'low_max_energy','dominant_scale':'low_dominant_scale'})

wf_combined = wf_high  # .merge(wf_med, on='MODZCTA', how='outer').merge(wf_low, on='MODZCTA', how='outer')
wf_combined = wf_combined.rename(columns={'MODZCTA':'ZIP'})
wf_combined.to_csv('wavelet_features_temporal.csv', index=False)


: 