#### This jupyter notebook is revised from https://pysal.org/spopt/notebooks/skater.html. You can check the explanation in this link for details
#### Warning: Run this file in your local computer will take extremely long time. Run this in a high-speed computing server

In [1]:
import geopandas
import libpysal
import matplotlib.pyplot as plt
import numpy
import pandas
import shapely
from sklearn.metrics import pairwise as skm
import spopt
import warnings

In [None]:
# This function is revised based on the process in the skater link above. 
# The whole process is: 1. load the shapefile 2. use the weight column in the shapefile (weight is determined in the K-mean collecion point notebook)
# 3. Set the parameters of the algorithm (e.g. n_clusters) 4. Run the skater model and assign the label to the shapefile
def get_skater_output(dataset, num_of_cluster, column_name):
    '''
    Perform skater with givin dataset and number of cluster
    
    Arg:
        dataset (shp or GeoJSON): the dataset we want to cluster
        num_of_cluster (int): number of cluster we want
        column_name (str): the column name we want to use as weight
    '''
    parcel = geopandas.read_file(dataset)
    # The column I use as weight
    attrs_name = [column_name]
    w = libpysal.weights.Queen.from_dataframe(parcel)
    # Number of clusters. Warning: n_clusters here won't be the exact number of clusters we got. We will got
    # more clusters than this number
    n_clusters = num_of_cluster
    trace = False
    islands = "increase"
    spanning_forest_kwds = dict(
        dissimilarity=skm.manhattan_distances,
        affinity=None,
        reduction=numpy.sum,
        center=numpy.mean,
        verbose=2
    )
    model = spopt.region.Skater(
    parcel,
    w,
    attrs_name,
    n_clusters=n_clusters,
    trace=trace,
    spanning_forest_kwds=spanning_forest_kwds
    )
    model.solve()
    # Assign the label
    parcel["label"] = model.labels_
    # Produce the shapefile as the output
    parcel.to_file("result.shp")
    return parcel


In [None]:
# Produce the new shapefile that each parcel belongs to a cluster generated by Skater
# The shapefile I used here, generated from the k-mean collection point jupyter notebook (will explain in that notebook),
# is in the galveston_shapefile_parcel folder
get_skater_output("galveston_parcel_skater.shp", 125, "weight")