In [None]:
%matplotlib inline
from importlib import reload
import json
import sys
sys.path.append("..")
from pathlib import Path
from tqdm import tqdm

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from skimage import io
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import cv2

import pythd

In [None]:
DATA_DIR = Path("..") / Path("data")
CHIP_SHAPE = (16, 16)
CHIP_STRIDE = (16, 16)

In [None]:
DAMAGE_LEVELS = {
    "un-classified": 1,
    "no-damage": 2,
    "minor-damage": 3,
    "major-damage": 4,
    "destroyed": 5
}

def build_chipped_df(images, chip_size=(16,16), chip_stride=(16,16)):
    dfs = []
    cw, ch = chip_size
    sx, sy = chip_stride
    
    for pre_path in tqdm(list(images)):
        pre_img = io.imread(str(pre_path))
        pre_chipped = pythd.image.ChippedImage.FromRGB(pre_img, chip_size, chip_stride)
        pre_df = pre_chipped.df
        pre_df.columns = ["pre_" + col for col in pre_df.columns]

        post_path = DATA_DIR / pre_path.name.replace("pre", "post")
        post_img = io.imread(str(post_path))
        post_chipped = pythd.image.ChippedImage.FromRGB(post_img, chip_size, chip_stride)
        post_df = post_chipped.df
        post_df.columns = ["post_" + col for col in post_df.columns]

        df = pd.concat((pre_df, post_df), axis=1)
        df.drop(["post_x", "post_y"], axis=1, inplace=True)
        df.rename({"pre_x": "x", "pre_y": "y"}, axis=1, inplace=True)
        df["image"] = pre_path.name
        df.astype({"x": "int32", "y": "int32"}, copy=False)
        df["building"] = 0
        df["damage"] = 1
        
        label_path = DATA_DIR / post_path.name.replace("png", "json")
        if label_path.exists():
            label_mask = np.zeros(post_img.shape[:2], dtype=np.uint8)
            with open(label_path, "r") as f:
                labels = json.load(f)
                labels = labels["features"]["xy"]
                for o in [obj for obj in labels if (obj["properties"]["feature_type"] == "building")]:
                    coords = o["wkt"][10:-2].split(', ')
                    bbox= np.array([list(map(lambda x: int(float(x)), coord.split(' '))) for coord in coords])
                    damage = DAMAGE_LEVELS[o["properties"]["subtype"]]
                    cv2.fillPoly(label_mask, [bbox], color=damage)
            
            for row in df.index:
                x = int(df.loc[row, "x"])
                y = int(df.loc[row, "y"])
                chip = label_mask[y:(y+ch), x:(x+cw)]
                df.loc[row, "building"] = int((chip > 0).any())
                df.loc[row, "damage"] = chip.max()
        dfs.append(df)
    
    dfs = pd.concat(dfs, axis=0, ignore_index=True)
    return dfs

df = build_chipped_df(DATA_DIR.glob("*pre_disaster.png"), chip_size=CHIP_SHAPE, chip_stride=CHIP_STRIDE)

In [None]:
features = [c for c in df.columns if "pixel" in c]
data = df.loc[:, features].values
print(df.shape)
df.head()

In [None]:
#base_filt = pythd.filter.ScikitLearnFilter(PCA, n_components=200)
filt = pythd.filter.ScikitLearnFilter(PCA, n_components=2)
#data = base_filt(data)
base_filt = None
f_x = filt(data)
print(f_x.shape)

In [None]:
reload(pythd)
reload(pythd.thd)
reload(pythd.complex)
reload(pythd.cover)
reload(pythd.mapper)
reload(pythd.clustering)

clustering = pythd.clustering.HierarchicalClustering()
cover = pythd.cover.IntervalCover.EvenlySpacedFromValues(f_x, 15, 0.4)
thd = pythd.thd.THD(data, filt, cover, group_threshold=1000, contract_amount=0.4)
thd.run(verbose=True)
root = thd.get_results()

In [None]:
import igraph
root.color_by_value(df["building"].values, normalize=True)
g = root.as_igraph_graph()
vs = {
    "margin": 40,
    "bbox": (350, 350),
    "vertex_label": g.vs["name"],
    "vertex_label_size": 10,
    "vertex_size": 20,
    "vertex_label_dist": 1.5,
    "vertex_label_angle": 0,
    "layout": g.layout_reingold_tilford(root=[0])
}
igraph.plot(g, **vs)

In [None]:
def show_group_image(group, i=0):
    images = df["image"].iloc[list(group.rids)].unique()
    gdata = df.iloc[list(group.rids), :]
    img_name = images[i]
    gdata = gdata[gdata["image"] == img_name]

    img_path = DATA_DIR / img_name
    img = io.imread(str(img_path))
    mask = np.zeros(img.shape[:2], dtype=np.float32)
    cw, ch = CHIP_SHAPE

    for x, y in zip(gdata["x"].values, gdata["y"].values):
        x, y = (int(x), int(y))
        mask[y:(y+ch), x:(x+cw)] = 1

    overlay = pythd.image.overlay_mask(img, mask, image_alpha=0.6)

    pythd.image.plot_images((overlay,))

group = root.get_group_by_name("3.2.1")
images = df["image"].iloc[list(group.rids)].unique()
print(images)

In [None]:
show_group_image(group, 0)

In [None]:
show_group_image(group, 1)

In [None]:
show_group_image(group, 2)

In [None]:
show_group_image(group, 3)

In [None]:
show_group_image(group, 4)

In [None]:
show_group_image(group, 5)

In [None]:
show_group_image(group, 6)