In [None]:
! pip install shapely geopandas pyproj

In [39]:
import shapely
import geopandas as gpd
import pandas as pd
import shapely.geometry.polygon as ply

def add_geo(r, geo):
    r['geometry'] = geo
    return r

def split_geos(r):
    if isinstance(r.geometry, ply.Polygon):
        return pd.DataFrame([r])
    df = pd.DataFrame([add_geo(r, geo) for geo in r.geometry])
    return df

def bufferz(c, rad, d):
    r = c.combine_first(d)
    r['geometry'] = c.geometry.buffer(rad)

    return r, (c.osm_id, d.distcode, c.geometry, rad/1000)

def make_clusters(cities, dists, buffer_margin):
    dat = [(c, d) for _, d in dists.iterrows()
         for _, c in cities[cities.within(d.geometry)].iterrows() ]   

    dat = [(c, d.geometry.exterior.distance(c.geometry) - buffer_margin, d) for c, d in dat]
    dat = [bufferz(*t) for t in dat]
    rows = [r for r, _ in dat]
    
    cities_df = pd.DataFrame([t for _, t in dat], columns=['id', 'distcode', 'geometry', 'rad'])
    cities_df = gpd.GeoDataFrame(cities_df, geometry='geometry', crs=3857).to_crs(4326)
    cities_df['lng'] = cities_df.geometry.map(lambda g: g.coords[0][0])
    cities_df['lat'] = cities_df.geometry.map(lambda g: g.coords[0][1])
    cities_df = pd.DataFrame(cities_df)
    cities_df = cities_df.drop(columns=['geometry'])

    gdf = gpd.GeoDataFrame(pd.DataFrame(rows), geometry='geometry', crs=3857) \
             .to_crs(4326) \
             .reset_index(drop=True)
    return gdf, cities_df

In [None]:
# Uttar Pradesh: 
# Jharkhand: Ranchi
# Chhattisgarh: 
# Odisha: Khordha

In [67]:
cities = gpd.read_file('India_MNM/Geography/Cities_India/cities_towns_suburbs.shp')
districts = gpd.read_file('India_MNM/Geography/Districts+Demographics_India/Demographics_of_India.shp')

# , 'Jharkhand', 'Chhatisgarh', 'Orissa', 'Uttar Pradesh'
# states = ['Jharkhand']
states = ['Jharkhand', 'Chhatisgarh', 'Orissa', 'Uttar Pradesh']
capitols = ['Lucknow', 'Ranchi', 'Raipur', 'Khordha']

# Split geos to make one row per polygon
dists = pd.concat([split_geos(r) for _,r in districts.iterrows()]).reset_index(drop=True)
dists = gpd.GeoDataFrame(dists, geometry='geometry', crs=4326)
dists = dists[dists.statename.isin(states)]
dists = dists[~dists.distname.isin(capitols)]

# Project to meter distance projection
dists = dists.to_crs(3857)
cities = cities.to_crs(3857)

In [68]:
dd, df = make_clusters(cities, dists, 2000)

In [None]:
dd

In [None]:
df

In [66]:
df.to_csv('output/cities.csv', index=False)

In [71]:
dd.to_file('output/clusters.shp')