In [72]:
from PIL import Image
from matplotlib.colors import rgb2hex
import pandas as pd
import geopandas as gpd
from sklearn.manifold import TSNE
from tslearn.metrics import dtw, cdist_dtw, cdist_soft_dtw, cdist_soft_dtw_normalized, soft_dtw
import glob
from tqdm import tqdm
import numpy as np

In [80]:
def get_colors_polygons(poly_division):
    file = gpd.read_file(f"data/shapefiles/{poly_division}.geojson").to_crs("EPSG:6933")
    centroids = file.centroid.apply(lambda x: (x.x, x.y)).to_list()
    centroids = np.array(centroids)
    for i in range(2):
        centroids[:, i] -= centroids[:, i].min()
        centroids[:, i] /= centroids[:, i].max()
        centroids[:, i] *= 511
    centroids = centroids.astype(int)

    img = Image.open("data/misc/bremm.png")
    # transform to array
    img = np.array(img)

    colors = []
    for i in centroids:
        colors.append(rgb2hex(img[i[1], i[0]] / 255))
    return colors

In [7]:
def get_dtw_matrix(ts, dtw_args={}):
    n = len(ts)
    dtw_matrix = np.zeros((n, n))
    for i in tqdm(range(n)):
        for j in range(i, n):
            dtw_matrix[i, j] = dtw(ts[i], ts[j], **dtw_args)
            dtw_matrix[j, i] = dtw_matrix[i, j]
    return dtw_matrix

In [86]:
def projection_coeffs(poly_division, time_interval):
    coeffs = pd.read_csv(f"data/coeffs/{poly_division}_{time_interval}.csv")
    columns_to_keep = ["id_poly", "date", "type"] + [col for col in coeffs.columns if "3" in col]
    feature_names = coeffs["type"].unique()
    coeffs = coeffs[columns_to_keep]

    colors = get_colors_polygons(poly_division)
    
    projections = []
    for i, feat in enumerate(feature_names):
        coeffs_ = coeffs[coeffs["type"] == feat].copy()
        coeffs_ = coeffs_.pivot(index = "id_poly", columns = "date", values = ["mean_freq_3"]).values
        
        #tsne = TSNE(n_components=2, random_state=0, metric = lambda x, y: soft_dtw(x, y, gamma=1.))
        tsne = TSNE(n_components=2, random_state=0)
        #dtw_matrix = get_dtw_matrix(coeffs_, dtw_args={"global_constraint": "sakoe_chiba", "sakoe_chiba_radius": 5})
        #dtw_matrix = cdist_dtw(coeffs_, global_constraint="sakoe_chiba", sakoe_chiba_radius=5)
        proj = tsne.fit_transform(coeffs_)
        #proj = tsne.fit_transform(dtw_matrix)
        # make projection be inside [0.025, 0.975]
        proj = (proj - proj.min(axis=0)) / (proj.max(axis=0) - proj.min(axis=0))
        proj = proj * 0.95 + 0.025
        df = pd.DataFrame(proj, columns=[f"{feat}_x", f"{feat}_y"])
        projections.append(df)
    projections = pd.concat(projections, axis=1)
    projections["id_poly"] = np.arange(len(projections))
    projections["color"] = colors
    projections.to_csv(f"data/projections/{poly_division}_{time_interval}.csv", index=False)

In [87]:
for time_interval in ["Month", "Day", "3days", "5days"]:
    for poly_division in ["SpGrid", "SpDistricts", "SpCenterCensus2k", "SpCenterCensus5k"]:
        projection_coeffs(poly_division, time_interval)