# Imports

In [261]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from scipy.interpolate import interp1d
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.manifold import MDS
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

import plotly.graph_objects as go

from sklearn.cluster import AffinityPropagation
from sklearn.cluster import AgglomerativeClustering
from sklearn.manifold import MDS

import alphashape
import shapely

# Constants

In [2]:
POLY = 'poly'
SMOOTH_POLY = 'smooth_poly'
INNER_POINTS = 'inner_points'
Z_DIST = 'z_dist'
X_COL='X'
Y_COL='Y'
Z_COL='Z'
POLY_COL = 'Poly'
FEAT_COLS = [X_COL, Y_COL, Z_COL]

SMOOTH_ITER = 3
RANDOM_POINTS=300

In [3]:
# Set seed
np.random.seed(0)

# Functions

In [4]:
def vis_2d(_df, _x, _y, _color, _algo):
    fig = px.scatter(_df, x=_x, y=_y, color=_color)
    fig.update_layout(title=f'{_algo}: 2D Visualization')
    fig.show()

def vis_3d(_df, _x, _y, _z, _color, _algo):
    fig = px.scatter_3d(_df, x=_x, y=_y, z=_z, color=_color)
    fig.update_layout(title=f'{_algo}: 3D Visualization')
    fig.show()

In [164]:
def dim_reduction_2d(_df, algo, feat_cols):
    if algo == 'pca':
        algo_obj = PCA(n_components=2)
    elif algo == 't-sne':
        algo_obj = TSNE(n_components=2, init='pca', random_state=0)
    elif algo == 'mds':
        algo_obj = MDS(n_components=2)
    elif algo == 'lda':
        algo_obj = LinearDiscriminantAnalysis(n_components=2)
    else:
        raise Exception(f'Unsupported algo {algo}')
    
    
    if algo != 'lda':
        result = algo_obj.fit_transform(_df[feat_cols].values)
    else:
        result = algo_obj.fit_transform(_df[feat_cols].values, _df[POLY_COL].values)
    _df[f'{algo}-one'] = result[:,0]
    _df[f'{algo}-two'] = result[:,1] 
    vis_2d(_df, f'{algo}-one', f'{algo}-two', POLY_COL, algo)

In [5]:
def smooth_poly_Chaikins_corner_cutting_iter(poly, iter=1):
    new_poly = poly[:]
    for i in range(iter):
        new_poly = smooth_poly_Chaikins_corner_cutting(new_poly, True)
    return new_poly

def smooth_poly_Chaikins_corner_cutting(poly, append_first_point):
    """
    poly is list of lists
    example: poly1 = [
    [3,3],
    [4,4],
    [5,4],
    [5,7],
    [6,8],
    [7,5],
    [6,3],
    [5,2],
    [4,2],
    [3,3]
    ]
    Based on https://stackoverflow.com/questions/27642237/smoothing-a-2-d-figure
    Q(i) = (3/4)P(i) + (1/4)P(i+1)
    R(i) = (1/4)P(i) + (3/4)P(i+1)
    """
    new_poly = []
    for i in range(len(poly)-1):
        q_i = [0.75 * poly[i][0] + 0.25 * poly[i+1][0], 0.75 * poly[i][1] + 0.25 * poly[i+1][1]]
        r_i = [0.25 * poly[i][0] + 0.75 * poly[i+1][0], 0.25 * poly[i][1] + 0.75 * poly[i+1][1]]
        new_poly.extend([q_i, r_i])
    # append first point for smoothness
    if append_first_point:
        new_poly.append(new_poly[0])
    return new_poly

In [131]:
def random_points_inside_polygon_and_3d(poly, number_of_points, loc, scale, num_outliers=0):
    """
    """
    # create shapely objects
    _poly = Polygon(poly)
    # get bounding box of polygon
    minx, miny, maxx, maxy = _poly.bounds
    # generate random points within the bounding box
    random_points = []
    while len(random_points) < number_of_points:
        x = np.random.uniform(low=minx, high=maxx)
        y = np.random.uniform(low=miny, high=maxy)
        z = np.random.normal(loc=loc, scale=scale)
        if _poly.contains(Point(x, y)):
            random_points.append([x, y, z])
    outliers=[]
    while len(outliers) < num_outliers:
        x = np.random.uniform(low=minx, high=maxx) + (maxx-minx)*3
        y = np.random.uniform(low=miny, high=maxy) + (maxy-miny)*3
        z = np.random.normal(loc=loc, scale=scale)
        outliers.append([x,y,z])
    return random_points + outliers

In [214]:
DEFAULT_PLOTLY_COLORS=['rgb(31, 119, 180)', 'rgb(255, 127, 14)',
                       'rgb(44, 160, 44)', 'rgb(214, 39, 40)',
                       'rgb(148, 103, 189)', 'rgb(140, 86, 75)',
                       'rgb(227, 119, 194)', 'rgb(127, 127, 127)',
                       'rgb(188, 189, 34)', 'rgb(23, 190, 207)']
def preprocess_polygons(_polys, title, num_outliers=0):
    for i in range(len(_polys)):
        _polys[i][SMOOTH_POLY] = np.array(smooth_poly_Chaikins_corner_cutting_iter(_polys[i][POLY], SMOOTH_ITER))
        _polys[i][INNER_POINTS] = np.array(random_points_inside_polygon_and_3d(_polys[i][SMOOTH_POLY], 
                                                                               RANDOM_POINTS,
                                                                               loc=_polys[i][Z_DIST][0],
                                                                               scale=_polys[i][Z_DIST][1],
                                                                               num_outliers=num_outliers))
    
    # Visualize 2D polygons
#     color=iter(plt.cm.rainbow(np.linspace(0,1,len(_polys)*2)))
    color=iter(DEFAULT_PLOTLY_COLORS)
    fig = go.Figure()
    # Add traces
    for i in range(len(_polys)):
        c = next(color)
#         fig.add_trace(go.Scatter(x=_polys[i][SMOOTH_POLY][:,0], 
#                                  y=_polys[i][SMOOTH_POLY][:,1],
#                                 mode='lines',
#                                 name=f'poly{i}',
# #                                 marker_color=f'rgba({c[0]}, {c[1]}, {c[2]}, {c[3]})'))
#                                 marker_color=c))
        fig.add_trace(go.Scatter(x=_polys[i][INNER_POINTS][:,0], 
                                 y=_polys[i][INNER_POINTS][:,1],
                                mode='markers',
                                name=f'poly{i}'))
#                                 marker_color=f'rgba({c[0]}, {c[1]}, {c[2]}, {c[3]})'))
#                                  marker_color=c))
    fig.update_layout(title=f'{title}: 2D Visualization')
    fig.show()
    
    # Visualize 3D
#     color=iter(plt.cm.rainbow(np.linspace(0,1,len(_polys)*2)))
    color=iter(DEFAULT_PLOTLY_COLORS)
    fig = go.Figure()
    # Add traces
    for i in range(len(_polys)):
        c = next(color)
        fig.add_trace(go.Scatter3d(x=_polys[i][INNER_POINTS][:,0], 
                                 y=_polys[i][INNER_POINTS][:,1],
                                 z=_polys[i][INNER_POINTS][:,2],
                                 mode='markers',
                                 name=f'poly{i}'))
#                                  marker_color=f'rgba({c[0]}, {c[1]}, {c[2]}, {c[3]})'))
#                                    marker_color=c))
    fig.update_layout(title=f'{title}: 3D Visualization')
    fig.show()
    
    # Create df for dim-reduction
    dfs = []
    for i, poly in enumerate(_polys):
        tmp_df = pd.DataFrame(poly[INNER_POINTS])
        tmp_df.columns=[X_COL, Y_COL, Z_COL]
        tmp_df[POLY_COL] = f'poly{i}'
        dfs.append(tmp_df)
    _df = pd.concat(dfs)
    
    return _df

# 2 Polygons No Overlap

In [96]:
polys = [
    {POLY:[
        [3,3],
        [3.5, 4],
        [4,4],
        [5,4],
        [5,7],
        [6,8],
        [7,5],
        [6,3],
        [5,2],
        [4,2],
        [3.5, 2],
        [3,3],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(0, 1)},
    {POLY:[
        [0,3],
        [0,5],
        [1,7],
        [3,9],
        [5,10],
        [7,10],
        [6,9],
        [5,9],
        [4,7],
        [4,5],
        [3,4],
        [2,3],
        [3,2],
        [2,1],
        [0,3],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(0, 1)},
]

In [9]:
df_2_poly = preprocess_polygons(polys, 'Polygons No Overlap')

## PCA

In [10]:
pca = PCA(n_components=3)
pca_result = pca.fit_transform(df_2_poly[FEAT_COLS].values)
df_2_poly['pca-one'] = pca_result[:,0]
df_2_poly['pca-two'] = pca_result[:,1] 
df_2_poly['pca-three'] = pca_result[:,2]
print(f'Explained variation per principal component: {pca.explained_variance_ratio_}')
vis_2d(df_2_poly, 'pca-one', 'pca-two', POLY_COL, 'PCA')

Explained variation per principal component: [0.51344872 0.37079827 0.11575301]


## T-SNE

In [11]:
tsne = TSNE(n_components=2, init='pca', random_state=0)
tsne_result = tsne.fit_transform(df_2_poly[FEAT_COLS].values)
df_2_poly['tsne-one'] = tsne_result[:,0]
df_2_poly['tsne-two'] = tsne_result[:,1] 
vis_2d(df_2_poly, 'tsne-one', 'tsne-two', POLY_COL, 'T-SNE')

## MDS

In [12]:
mds = MDS(n_components=2)
mds_result = mds.fit_transform(df_2_poly[FEAT_COLS].values)
df_2_poly['mds-one'] = mds_result[:,0]
df_2_poly['mds-two'] = mds_result[:,1] 
vis_2d(df_2_poly, 'mds-one', 'mds-two', POLY_COL, 'MDS')

# Three Polygons 2 no Overlap, 1 Overlap Both

In [13]:
polys_3_overlap = [
    {POLY:[
        [3,3],
        [3.5, 4],
        [4,4],
        [5,4],
        [5,7],
        [6,8],
        [7,5],
        [6,3],
        [5,2],
        [4,2],
        [3.5, 2],
        [3,3],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(0, 1)},
    
    {POLY:[
        [0,3],
        [0,5],
        [1,7],
        [3,9],
        [5,10],
        [7,10],
        [6,9],
        [5,9],
        [4,7],
        [4,5],
        [3,4],
        [2,3],
        [3,2],
        [2,1],
        [0,3],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(0, 1)},
    
    {POLY:[
        [2,6],
        [6,6],
        [6,2],
        [2,2],
        [2,6],
        
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(0, 1)},
]

In [14]:
df_3_poly = preprocess_polygons(polys_3_overlap, 'Polygons Overlap')

## PCA

In [15]:
pca = PCA(n_components=3)
pca_result = pca.fit_transform(df_3_poly[FEAT_COLS].values)
df_3_poly['pca-one'] = pca_result[:,0]
df_3_poly['pca-two'] = pca_result[:,1] 
df_3_poly['pca-three'] = pca_result[:,2]
print(f'Explained variation per principal component: {pca.explained_variance_ratio_}')
vis_2d(df_3_poly, 'pca-one', 'pca-two', POLY_COL, 'PCA')

Explained variation per principal component: [0.48706783 0.37003608 0.14289609]


## T-SNE

In [16]:
tsne = TSNE(n_components=2, init='pca', random_state=0)
tsne_result = tsne.fit_transform(df_3_poly[FEAT_COLS].values)
df_3_poly['tsne-one'] = tsne_result[:,0]
df_3_poly['tsne-two'] = tsne_result[:,1] 
vis_2d(df_3_poly, 'tsne-one', 'tsne-two', POLY_COL, 'T-SNE')

## MDS

In [17]:
mds = MDS(n_components=2)
mds_result = mds.fit_transform(df_3_poly[FEAT_COLS].values)
df_3_poly['mds-one'] = mds_result[:,0]
df_3_poly['mds-two'] = mds_result[:,1] 
vis_2d(df_3_poly, 'mds-one', 'mds-two', POLY_COL, 'MDS')

## LDA

In [18]:
lda = LinearDiscriminantAnalysis(n_components=2)
lda_result = lda.fit_transform(df_3_poly[FEAT_COLS].values, df_3_poly[POLY_COL].values)
df_3_poly['lda-one'] = lda_result[:,0]
df_3_poly['lda-two'] = lda_result[:,1] 
vis_2d(df_3_poly, 'lda-one', 'lda-two', POLY_COL, 'LDA')

# Three Polygons 2 no Overlap, 1 Overlap Both, Z Scale Different in the Overlapped One

In [310]:
polys_3_overlap_z_differ = [
    {POLY:[
        [3,3],
        [3.5, 4],
        [4,4],
        [5,4],
        [5,7],
        [6,8],
        [7,5],
        [6,3],
        [5,2],
        [4,2],
        [3.5, 2],
        [3,3],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(0, 1)},
    
    {POLY:[
        [0,3],
        [0,5],
        [1,7],
        [3,9],
        [5,10],
        [7,10],
        [6,9],
        [5,9],
        [4,7],
        [4,5],
        [3,4],
        [2,3],
        [3,2],
        [2,1],
        [0,3],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(0, 1)},
    
    {POLY:[
        [2,6],
        [6,6],
        [6,2],
        [2,2],
        [2,6],
        
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(2, 1)},
]

In [312]:
df_3_poly_z_differ = preprocess_polygons(polys_3_overlap_z_differ, 'Polygons Overlap on 2D but not On 3D')

## PCA
Seems like there is intersection between poly0 and poly1 but in 3D there is no intersection at all by definition!
Adding poly2 confuses PCA

In [313]:
pca = PCA(n_components=3)
pca_result = pca.fit_transform(df_3_poly_z_differ[FEAT_COLS].values)
df_3_poly_z_differ['pca-one'] = pca_result[:,0]
df_3_poly_z_differ['pca-two'] = pca_result[:,1] 
df_3_poly_z_differ['pca-three'] = pca_result[:,2]
print(f'Explained variation per principal component: {pca.explained_variance_ratio_}')
vis_2d(df_3_poly_z_differ, 'pca-one', 'pca-two', POLY_COL, 'PCA')

Explained variation per principal component: [0.46469254 0.31574171 0.21956575]


## T-SNE
Separation is better in T-SNE but difficult to understand relations

In [314]:
tsne = TSNE(n_components=2, init='pca', random_state=0)
tsne_result = tsne.fit_transform(df_3_poly_z_differ[FEAT_COLS].values)
df_3_poly_z_differ['tsne-one'] = tsne_result[:,0]
df_3_poly_z_differ['tsne-two'] = tsne_result[:,1] 
vis_2d(df_3_poly_z_differ, 'tsne-one', 'tsne-two', POLY_COL, 'T-SNE')

## MDS
MDS is similar to PCA

In [315]:
mds = MDS(n_components=2)
mds_result = mds.fit_transform(df_3_poly_z_differ[FEAT_COLS].values)
df_3_poly_z_differ['mds-one'] = mds_result[:,0]
df_3_poly_z_differ['mds-two'] = mds_result[:,1] 
vis_2d(df_3_poly_z_differ, 'mds-one', 'mds-two', POLY_COL, 'MDS')

## LDA

In [316]:
lda = LinearDiscriminantAnalysis(n_components=2)
lda_result = lda.fit_transform(df_3_poly_z_differ[FEAT_COLS].values, df_3_poly[POLY_COL].values)
df_3_poly_z_differ['lda-one'] = lda_result[:,0]
df_3_poly_z_differ['lda-two'] = lda_result[:,1] 
vis_2d(df_3_poly_z_differ, 'lda-one', 'lda-two', POLY_COL, 'LDA')

## Anchors Plot

In [341]:
anchors_df = find_anchors(df_3_poly_z_differ, FEAT_COLS, POLY_COL, 5)

Finding anchors of poly0
Finding anchors of poly1
Finding anchors of poly2


In [342]:
vis_2d(anchors_df, 'mds-agg-one', 'mds-agg-two', POLY_COL, 'MDS')

In [343]:
anchors_plot(anchors_df, 'mds-agg-one', 'mds-agg-two', delta=0.3, alpha=0.5)

# Three Polygons 2 no Overlap, 1 Overlap Both, Z Scale Different in all Three

In [303]:
polys_3_overlap_z_differ_complete = [
    {POLY:[
        [3,3],
        [3.5, 4],
        [4,4],
        [5,4],
        [5,7],
        [6,8],
        [7,5],
        [6,3],
        [5,2],
        [4,2],
        [3.5, 2],
        [3,3],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(0, 1)},
    
    {POLY:[
        [0,3],
        [0,5],
        [1,7],
        [3,9],
        [5,10],
        [7,10],
        [6,9],
        [5,9],
        [4,7],
        [4,5],
        [3,4],
        [2,3],
        [3,2],
        [2,1],
        [0,3],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(3, 1)},
    
    {POLY:[
        [2,6],
        [6,6],
        [6,2],
        [2,2],
        [2,6],
        
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(10, 1)},
]

In [304]:
df_3_poly_z_differ_complete = preprocess_polygons(polys_3_overlap_z_differ_complete, 'Polygons Overlap on 2D but not On 3D')

## PCA

In [27]:
pca = PCA(n_components=3)
pca_result = pca.fit_transform(df_3_poly_z_differ_complete[FEAT_COLS].values)
df_3_poly_z_differ_complete['pca-one'] = pca_result[:,0]
df_3_poly_z_differ_complete['pca-two'] = pca_result[:,1] 
df_3_poly_z_differ_complete['pca-three'] = pca_result[:,2]
print(f'Explained variation per principal component: {pca.explained_variance_ratio_}')
vis_2d(df_3_poly_z_differ_complete, 'pca-one', 'pca-two', POLY_COL, 'PCA')

Explained variation per principal component: [0.77641605 0.1295077  0.09407625]


## T-SNE

In [28]:
tsne = TSNE(n_components=2, init='pca', random_state=0)
tsne_result = tsne.fit_transform(df_3_poly_z_differ_complete[FEAT_COLS].values)
df_3_poly_z_differ_complete['tsne-one'] = tsne_result[:,0]
df_3_poly_z_differ_complete['tsne-two'] = tsne_result[:,1] 
vis_2d(df_3_poly_z_differ_complete, 'tsne-one', 'tsne-two', POLY_COL, 'T-SNE')

## MDS

In [29]:
mds = MDS(n_components=2)
mds_result = mds.fit_transform(df_3_poly_z_differ_complete[FEAT_COLS].values)
df_3_poly_z_differ_complete['mds-one'] = mds_result[:,0]
df_3_poly_z_differ_complete['mds-two'] = mds_result[:,1] 
vis_2d(df_3_poly_z_differ_complete, 'mds-one', 'mds-two', POLY_COL, 'MDS')

## LDA

In [30]:
lda = LinearDiscriminantAnalysis(n_components=2)
lda_result = lda.fit_transform(df_3_poly_z_differ_complete[FEAT_COLS].values, df_3_poly[POLY_COL].values)
df_3_poly_z_differ_complete['lda-one'] = lda_result[:,0]
df_3_poly_z_differ_complete['lda-two'] = lda_result[:,1] 
vis_2d(df_3_poly_z_differ_complete, 'lda-one', 'lda-two', POLY_COL, 'LDA')

# Where PCA, T-SNE, MDS and LDA fails

In [247]:
pca_fails_polygons = [
    {POLY:[
        [2,0],
        [3,1],
        [4,2],
        [5,1],
        [6,0],
        [5,-1],
        [4,-2],
        [3,-1],
        [2,0],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(5, 1)},
    
    {POLY:[
        [2,0],
        [3,1],
        [4,2],
        [5,1],
        [6,0],
        [5,-1],
        [4,-2],
        [3,-1],
        [2,0],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(-5, 1)},
    
    ####################
    {POLY:[
        [12,0],
        [13,1],
        [14,2],
        [15,1],
        [16,0],
        [15,-1],
        [14,-2],
        [13,-1],
        [12,0],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(-5, 1)},
    ####################
    
    {POLY:[
        [-2,0],
        [-3,1],
        [-4,2],
        [-5,1],
        [-6,0],
        [-5,-1],
        [-4,-2],
        [-3,-1],
        [-2,0],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(5, 1)},
    
    {POLY:[
        [-2,0],
        [-3,1],
        [-4,2],
        [-5,1],
        [-6,0],
        [-5,-1],
        [-4,-2],
        [-3,-1],
        [-2,0],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(-5, 1)},
    
    ######################
    {POLY:[
        [-12,0],
        [-13,1],
        [-14,2],
        [-15,1],
        [-16,0],
        [-15,-1],
        [-14,-2],
        [-13,-1],
        [-12,0],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(-5, 1)},
    ######################
    
    {POLY:[
        [0,5],
        [-1,7],
        [-2,9],
        [-1,11],
        [0,20],
        [1,11],
        [2,9],
        [1,7],
        [0,5],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(-5, 1)},
    
    {POLY:[
        [0,-5],
        [-1,-7],
        [-2,-9],
        [-1,-11],
        [0,-20],
        [1,-11],
        [2,-9],
        [1,-7],
        [0,-5],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(-5, 1)},
    
]

In [253]:
df_pca_fails_polygons = preprocess_polygons(pca_fails_polygons, 'Original Dim')

## PCA

In [249]:
dim_reduction_2d(df_pca_fails_polygons, 'pca', FEAT_COLS)

## T-SNE

In [250]:
dim_reduction_2d(df_pca_fails_polygons, 't-sne', FEAT_COLS)

## MDS

In [254]:
dim_reduction_2d(df_pca_fails_polygons, 'mds', FEAT_COLS)

## LDA

In [252]:
dim_reduction_2d(df_pca_fails_polygons, 'lda', FEAT_COLS)

## Anchors Plot

In [290]:
def anchors_to_contour(anchors_df_, x, y, delta=0.5):
    df_up = anchors_df_.copy()
    df_up[y] = df_up[y] + delta
    df_down = anchors_df_.copy()
    df_down[y] = df_down[y] - delta
    df_right = anchors_df_.copy()
    df_right[x] = df_right[x] + delta
    df_left = anchors_df_.copy()
    df_left[x] = df_left[x] - delta
    return pd.concat([df_up, df_down, df_left, df_right])

np.random.seed(31415)
import random
random.seed(9001)

def find_anchors(df_, feat_cols_, label_col, n_clusters):
    # Aggolomerative cluster for each label
    df_ = df_.copy()
    df_['cluster'] = None
    for label in sorted(df_[label_col].unique()):
        print(f'Finding anchors of {label}')
        ag = AgglomerativeClustering(n_clusters=n_clusters)
        df_.loc[df_[label_col]==label, 'cluster'] = ag.fit_predict(df_[df_[label_col]==label][feat_cols_].values)
    
    # Find centroids of clusters to be anchors
    anchors_df_ = df_.groupby([label_col, 'cluster']).mean()
    
    # MDS
    mds = MDS(n_components=2)
    mds_result = mds.fit_transform(anchors_df_[feat_cols_].values)
    anchors_df_['mds-agg-one'] = mds_result[:,0]
    anchors_df_['mds-agg-two'] = mds_result[:,1] 
    anchors_df_.reset_index(inplace=True)
    anchors_df_ = anchors_df_[['mds-agg-one', 'mds-agg-two', label_col]]
    return anchors_df_

DEFAULT_PLOTLY_COLORS=['rgb(31, 119, 180)', 'rgb(255, 127, 14)',
                       'rgb(44, 160, 44)', 'rgb(214, 39, 40)',
                       'rgb(148, 103, 189)', 'rgb(140, 86, 75)',
                       'rgb(227, 119, 194)', 'rgb(127, 127, 127)',
                       'rgb(188, 189, 34)', 'rgb(23, 190, 207)']

def get_concave_hull(points, alpha):
    alpha_shape = alphashape.alphashape(points.tolist(), alpha)
    smooth_shapes = []
    if isinstance(alpha_shape, shapely.geometry.polygon.Polygon):
        alpha_shape = [alpha_shape]
    else: # Multipolygon
        alpha_shape = list(alpha_shape)
    for shape in list(alpha_shape):
        x, y = shape.exterior.coords.xy
        smooth_shape = np.array(smooth_poly_Chaikins_corner_cutting_iter(list(zip(x, y)), 3))
        smooth_shapes.append(smooth_shape)
    return smooth_shapes

def anchors_plot(anchors_agg_df_, x, y, delta, alpha):
    color=iter(DEFAULT_PLOTLY_COLORS)
    contours_df = anchors_to_contour(anchors_agg_df_, x, y, delta)
    fig = go.Figure()
    for label in sorted(anchors_agg_df_[POLY_COL].unique()):
        points = contours_df[contours_df[POLY_COL]==label][[x, y]].values
        concave_hulls = get_concave_hull(points, alpha)
        
        anchors_tmp = anchors_agg_df_[anchors_agg_df_[POLY_COL]==label][[x, y]].values
        c = next(color)
        fig.add_trace(go.Scatter(x=anchors_tmp[:, 0], y=anchors_tmp[:, 1], 
                                 mode='markers', 
                                 marker_color=c,
                                 name=f'poly_{label}'))
        for concave_hull in concave_hulls:
            fig.add_trace(go.Scatter(x=concave_hull[:, 0], 
                                     y=concave_hull[:, 1], 
                                     fill='toself', 
                                     marker_color=c,
                                     name=f'poly_{label}'))
    fig.show()

        

In [275]:
anchors_df = find_anchors(df_pca_fails_polygons, FEAT_COLS, POLY_COL, 5)

Finding anchors of poly0
Finding anchors of poly1
Finding anchors of poly2
Finding anchors of poly3
Finding anchors of poly4
Finding anchors of poly5
Finding anchors of poly6
Finding anchors of poly7


In [276]:
vis_2d(anchors_df, 'mds-agg-one', 'mds-agg-two', POLY_COL, 'MDS')

In [277]:
anchors_plot(anchors_df, 'mds-agg-one', 'mds-agg-two', delta=0.3, alpha=0.5)

# Where MDS Fails

In [344]:
polys_mds_fail = [
    {POLY:[
        [3,3],
        [3.5, 4],
        [4,4],
        [5,4],
        [5,7],
        [6,8],
        [7,5],
        [6,3],
        [5,2],
        [4,2],
        [3.5, 2],
        [3,3],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(0, 1)},
    
    {POLY:[
        [0,3],
        [0,5],
        [1,7],
        [3,9],
        [5,10],
        [7,10],
        [6,9],
        [5,9],
        [4,7],
        [4,5],
        [3,4],
        [2,3],
        [3,2],
        [2,1],
        [0,3],
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(0, 1)},
    
    {POLY:[
        [2,6],
        [6,6],
        [6,2],
        [2,2],
        [2,6],
        
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(10, 1)},
    
    {POLY:[
        [2,6],
        [6,6],
        [6,2],
        [2,2],
        [2,6],
        
    ],
    SMOOTH_POLY: None, INNER_POINTS: None, Z_DIST:(-10, 1)},
]

In [345]:
dfpolys_mds_fail = preprocess_polygons(polys_mds_fail, 'polys_mds_fail')

## PCA

In [237]:
dim_reduction_2d(dfpolys_mds_fail, 'pca', FEAT_COLS)

## T-SNE

In [238]:
dim_reduction_2d(dfpolys_mds_fail, 't-sne', FEAT_COLS)

## MDS

In [257]:
dim_reduction_2d(dfpolys_mds_fail, 'mds', FEAT_COLS)

## LDA

In [240]:
dim_reduction_2d(dfpolys_mds_fail, 'lda', FEAT_COLS)

## Anchors Plot

In [300]:
anchors_df2 = find_anchors(dfpolys_mds_fail, FEAT_COLS, POLY_COL, 5)

Finding anchors of poly0
Finding anchors of poly1
Finding anchors of poly2
Finding anchors of poly3


In [301]:
vis_2d(anchors_df2, 'mds-agg-one', 'mds-agg-two', POLY_COL, 'MDS')

In [302]:
anchors_plot(anchors_df2, 'mds-agg-one', 'mds-agg-two', delta=0.3, alpha=0.5)