In [43]:
import numpy as np
import pandas as pd
import geopandas as gpd
import wavelet_transform as wt
import signal_processing as sp
import scipy.sparse
from sklearn.preprocessing import StandardScaler


In [44]:
def precompute_wavelet(poly_division, time_interval):
    df = pd.read_csv(f"data/polygon_data/{poly_division}_{time_interval}.csv")
    features = df.columns[2:]
    try:
        adj_matrix = np.load(f"data/adj_matrix/{poly_division}.npy")
    except:
        adj_matrix = scipy.sparse.load_npz(f"data/adj_matrix/{poly_division}.npz").toarray()

    signal_multi = np.stack(df.groupby("id_poly").apply(lambda x: x.sort_values("date").drop(columns=["id_poly", "date"]).values).values)
    
    # normalization
    for i in range(signal_multi.shape[2]):
        mu = np.mean(signal_multi[:, :, i])
        sigma = np.std(signal_multi[:, :, i])
        if sigma == 0:
            sigma = 1
        signal_multi[:, :, i] = (signal_multi[:, :, i] - mu) / sigma
    
    df_res = []

    for k, feature in enumerate(features):
        df_start = df[["id_poly", "date"]].copy()
        signal = signal_multi[:, :, k]
        n_timestamps = signal_multi.shape[1]

        wav =  wt.WaveletTransform(
            adj_matrix,
            n_timestamps,
            graph_product="strong",
            n_filters=4,
            kernel="abspline",
            scaling_function=False,
            method="chebyshev",
            order_chebyshev=30,
        )
        coeffs = wav.transform(signal)
        coeffs = sp.get_scaled_coefficients(coeffs)


        # create column for coeffs in the df_res
        for i in range(coeffs.shape[-1]):
            df_start[f"mean_freq_{i}"] = coeffs[:, :, i].flatten()
        df_start["type"] = feature

        df_res.append(df_start)

    df_res = pd.concat(df_res)
    float_columns = df_res.columns[df_res.dtypes == np.float64] 
    df_res[float_columns] = df_res[float_columns].astype(np.float16)
    df_res.to_csv(f"data/coeffs/{poly_division}_{time_interval}.csv", index=False)

In [45]:
for time_interval in ["Month", "Day", "3days", "5days"]:
    for poly_division in ["SpGrid", "SpDistricts", "SpCenterCensus2k", "SpCenterCensus5k"]:
        precompute_wavelet(poly_division, time_interval)

  signal_multi = np.stack(df.groupby("id_poly").apply(lambda x: x.sort_values("date").drop(columns=["id_poly", "date"]).values).values)
  signal_multi = np.stack(df.groupby("id_poly").apply(lambda x: x.sort_values("date").drop(columns=["id_poly", "date"]).values).values)
  signal_multi = np.stack(df.groupby("id_poly").apply(lambda x: x.sort_values("date").drop(columns=["id_poly", "date"]).values).values)
  signal_multi = np.stack(df.groupby("id_poly").apply(lambda x: x.sort_values("date").drop(columns=["id_poly", "date"]).values).values)
  signal_multi = np.stack(df.groupby("id_poly").apply(lambda x: x.sort_values("date").drop(columns=["id_poly", "date"]).values).values)
  signal_multi = np.stack(df.groupby("id_poly").apply(lambda x: x.sort_values("date").drop(columns=["id_poly", "date"]).values).values)
  signal_multi = np.stack(df.groupby("id_poly").apply(lambda x: x.sort_values("date").drop(columns=["id_poly", "date"]).values).values)
  signal_multi = np.stack(df.groupby("id_poly").

## Spatial Transform

In [46]:
from pygsp import graphs

def create_G_H_spatial(adjancency_matrix, n_timestamps):

    x, y = np.where(adjancency_matrix)
    e = []
    nodes = np.arange(adjancency_matrix.shape[0])
    n_nodes = adjancency_matrix.shape[0]
    
    for t in range(n_timestamps):  # link between nodes in the same timestamp
        e.append(np.array([x + t * n_nodes, y + t * n_nodes]).T)

    e = np.concatenate(e)
    adjancency_matrix_temporal = scipy.sparse.csr_matrix(
        (np.ones(e.shape[0]), (e[:, 0], e[:, 1])),
        shape=(
            n_timestamps * adjancency_matrix.shape[0],
            n_timestamps * adjancency_matrix.shape[0],
        ),
    )
    G_H = graphs.Graph(adjancency_matrix_temporal)
    return G_H


def precompute_spatial_wavelet(poly_division, time_interval):
    
    df = pd.read_csv(f"data/polygon_data/{poly_division}_{time_interval}.csv")
    features = df.columns[2:]
    try:
        adj_matrix = np.load(f"data/adj_matrix/{poly_division}.npy")
    except:
        adj_matrix = scipy.sparse.load_npz(f"data/adj_matrix/{poly_division}.npz").toarray()
    signal_multi = np.stack(df.groupby("id_poly").apply(lambda x: x.sort_values("date").drop(columns=["id_poly", "date"]).values).values)

    # normalization
    for i in range(signal_multi.shape[2]):
        mu = np.mean(signal_multi[:, :, i])
        sigma = np.std(signal_multi[:, :, i])
        if sigma == 0:
            sigma = 1
        signal_multi[:, :, i] = (signal_multi[:, :, i] - mu) / sigma

    
    n_timestamps = signal_multi.shape[1]
    G_H = create_G_H_spatial(adj_matrix, n_timestamps)
    df_res = []

    for k, feature in enumerate(features):
        df_start = df[["id_poly", "date"]].copy()
        signal = signal_multi[:, :, k]

        n_timestamps = signal_multi.shape[1]

        wav =  wt.WaveletTransform(
            adj_matrix,
            n_timestamps,
            G_H=G_H,
            graph_product="strong",
            n_filters=4,
            kernel="abspline",
            scaling_function=False,
            method="chebyshev",
            order_chebyshev=30,
        )
        coeffs = wav.transform(signal)
        coeffs = sp.get_scaled_coefficients(coeffs)
        # create column for coeffs in the df_res
        for i in range(coeffs.shape[-1]):
            df_start[f"mean_freq_{i}"] = coeffs[:, :, i].flatten()
        df_start["type"] = feature
        df_res.append(df_start)
        
    df_res = pd.concat(df_res)
    float_columns = df_res.columns[df_res.dtypes == np.float64] 
    df_res[float_columns] = df_res[float_columns].astype(np.float16)
    df_res.to_csv(f"data/coeffs_spatial/{poly_division}_{time_interval}.csv", index=False)
    

In [47]:
for time_interval in ["Month", "Day", "3days", "5days"]:
    for poly_division in ["SpGrid", "SpDistricts", "SpCenterCensus2k", "SpCenterCensus5k"]:
        precompute_spatial_wavelet(poly_division, time_interval)

  signal_multi = np.stack(df.groupby("id_poly").apply(lambda x: x.sort_values("date").drop(columns=["id_poly", "date"]).values).values)
  signal_multi = np.stack(df.groupby("id_poly").apply(lambda x: x.sort_values("date").drop(columns=["id_poly", "date"]).values).values)
  signal_multi = np.stack(df.groupby("id_poly").apply(lambda x: x.sort_values("date").drop(columns=["id_poly", "date"]).values).values)
  signal_multi = np.stack(df.groupby("id_poly").apply(lambda x: x.sort_values("date").drop(columns=["id_poly", "date"]).values).values)
  signal_multi = np.stack(df.groupby("id_poly").apply(lambda x: x.sort_values("date").drop(columns=["id_poly", "date"]).values).values)
  signal_multi = np.stack(df.groupby("id_poly").apply(lambda x: x.sort_values("date").drop(columns=["id_poly", "date"]).values).values)
  signal_multi = np.stack(df.groupby("id_poly").apply(lambda x: x.sort_values("date").drop(columns=["id_poly", "date"]).values).values)
  signal_multi = np.stack(df.groupby("id_poly").