In [1]:
from PIL import Image
from matplotlib.colors import rgb2hex
import pandas as pd
import geopandas as gpd
from sklearn.manifold import TSNE
import numpy as np

import torch
from torch.utils.data import DataLoader
import argparse

from ae import *

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [None]:
def get_colors_polygons(poly_division):
    file = gpd.read_file(f"data/shapefiles/{poly_division}.geojson").to_crs("EPSG:6933")
    centroids = file.centroid.apply(lambda x: (x.x, x.y)).to_list()
    centroids = np.array(centroids)
    for i in range(2):
        centroids[:, i] -= centroids[:, i].min()
        centroids[:, i] /= centroids[:, i].max()
        centroids[:, i] *= 511
    centroids = centroids.astype(int)

    img = Image.open("data/misc/bremm.png")
    img = np.array(img)

    colors = []
    for i in centroids:
        colors.append(rgb2hex(img[i[1], i[0]] / 255))
    return colors

In [4]:
parser = argparse.ArgumentParser()
args = parser.parse_args('')
args.batch_size = 32
args.embedding_dim = 64
args.n_layers = 1
args.device = "cuda" if torch.cuda.is_available() else "cpu"
args.dropout = False
args.use_bn = False
args.lr = 0.001
args.epoch = 200


def projection_coeffs_deep(poly_division, time_interval, spatial = False):
    if spatial:
        coeffs = pd.read_csv(f"data/coeffs_spatial/{poly_division}_{time_interval}.csv")
    else:
        coeffs = pd.read_csv(f"data/coeffs/{poly_division}_{time_interval}.csv")

    colors = get_colors_polygons(poly_division)
    ts = []
    for v in coeffs.type.unique():
        ts.append(coeffs[coeffs.type == v].pivot(index = "id_poly", columns = "date", values = ["mean_freq_3"]).values)
    ts = np.array(ts)
    ts = ts.transpose(1, 0, 2)
    ts = [t for t in ts]


    train_idx = np.random.choice(len(ts), int(0.8 * len(ts)), replace = False)
    val_idx = np.array([i for i in range(len(ts)) if i not in train_idx])
    ts_train = [torch.tensor(ts[i], dtype = torch.float32) for i in train_idx]
    ts_val = [torch.tensor(ts[i], dtype = torch.float32) for i in val_idx]
    ts_train = DataLoader(ts_train, batch_size = args.batch_size, shuffle = False)
    ts_val = DataLoader(ts_val, batch_size = args.batch_size, shuffle = False)


    model = AutoencoderConv(
        input_dim = ts[0].shape[0],
        encoding_dim = args.embedding_dim,
        seq_len = ts[0].shape[1],
        h_dims = [128, 64],
        h_activ = nn.ReLU(),
        out_activ = nn.Identity()
    )

    train_loss, val_loss = train_model(
        model,
        ts_train,
        ts_val,
        args,
        verbose = False
    )

    ts_train = [torch.tensor(t, dtype = torch.float32) for t in ts]
    ts_train = DataLoader(ts_train, batch_size = args.batch_size, shuffle = False)

    encodings = get_encodings(model, ts_train, args)
        
    tsne = TSNE(n_components=2, random_state=0)
    proj = tsne.fit_transform(encodings)

    for i in range(2):
        proj[:, i] -= proj[:, i].min()
        #proj[:, i] += 0.05
        proj[:, i] /= proj[:, i].max()

    projections = pd.DataFrame(proj, columns = ["x", "y"])
    projections["id_poly"] = np.arange(len(ts))
    projections["color"] = colors

    ts = np.array(ts) # (n, n_f, t)
    # for each n and n_f, compute the mean of the time series
    mean_ts = ts.mean(axis = 2)
    for i, v in enumerate(coeffs.type.unique()):
        projections[v] = mean_ts[:, i]

    if spatial:
        projections.to_csv(f"data/projections_spatial/{poly_division}_{time_interval}.csv", index=False)
    else:
        projections.to_csv(f"data/projections/{poly_division}_{time_interval}.csv", index=False)

In [12]:
projection_coeffs_deep("SpCenterCensus5k", "Period1")
projection_coeffs_deep("SpCenterCensus5k", "Period1", True)


In [20]:
projection_coeffs_deep("SpCenterCensus5k", "Period2")
projection_coeffs_deep("SpCenterCensus5k", "Period2", True)

In [5]:
projection_coeffs_deep("NYBlocks", "Period1")
projection_coeffs_deep("NYBlocks", "Period1", True)

In [17]:
projection_coeffs_deep("BLACities", "Year")
projection_coeffs_deep("BLACities", "Year", True)

In [18]:
projection_coeffs_deep("BLACities", "Year2")
projection_coeffs_deep("BLACities", "Year2", True)