In [1]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
import transbigdata as tbd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
districts = gpd.read_file('UrbanEVSupplemental\shenzhen_districts\Shenzhen.shp')
print(districts.columns)
print(districts.shape)
districts.head(2)

  districts = gpd.read_file('UrbanEVSupplemental\shenzhen_districts\Shenzhen.shp')


Index(['OBJECTID', 'TAZID', 'AREA', 'X', 'Y', 'ZONE', 'LENG_ROAD',
       'SHAPE_Leng', 'SHAPE_Area', 'geometry'],
      dtype='object')
(491, 10)


Unnamed: 0,OBJECTID,TAZID,AREA,X,Y,ZONE,LENG_ROAD,SHAPE_Leng,SHAPE_Area,geometry
0,1,1054.0,11.49,114.33353,22.783052,781.0,10.135,25856.846986,13581804.0,"POLYGON ((12727715 2609219, 12727838 2609098, ..."
1,2,1052.0,17.07,114.282465,22.789111,775.0,6.368,30117.841314,20191527.5,"POLYGON ((12721609 2609078, 12721630 2608932, ..."


In [3]:
zone = pd.read_csv('zone-information.csv')
print(zone.columns)
print(zone.shape)
zone.head(2)

Index(['TAZID', 'longitude', 'latitude', 'charge_count', 'area', 'perimeter'], dtype='object')
(275, 6)


Unnamed: 0,TAZID,longitude,latitude,charge_count,area,perimeter
0,1066,113.910301,22.521287,40,1577892.982,5119.3013
1,1068,113.91868,22.517837,271,1405268.834,5460.2638


In [4]:
zone['TAZID'] = zone['TAZID'].astype(int)
districts['TAZID'] = districts['TAZID'].astype(int)
zone_geo = pd.merge(zone, districts, on='TAZID', how='left')

zone_geo = zone_geo[['TAZID', 'area', 'geometry']]
zone_geo = gpd.GeoDataFrame(zone_geo,geometry='geometry')
print(zone_geo.shape)
print(type(zone_geo))
zone_geo.head(2)

(275, 3)
<class 'geopandas.geodataframe.GeoDataFrame'>


Unnamed: 0,TAZID,area,geometry
0,1066,1577892.982,"POLYGON ((12680558 2573496, 12680096 2573463, ..."
1,1068,1405268.834,"POLYGON ((12681747 2573552, 12681409 2572621, ..."


In [5]:
zone_geo = zone_geo.set_crs(epsg=3857)
zone_geo = zone_geo.to_crs(epsg=4326)
zone_geo.head(2)

Unnamed: 0,TAZID,area,geometry
0,1066,1577892.982,"POLYGON ((113.91139 22.51521, 113.90724 22.514..."
1,1068,1405268.834,"POLYGON ((113.92207 22.51568, 113.91904 22.507..."


## Landuse

In [6]:
landuse = gpd.read_file('guangdong_latest_shapefile/guangdong_latest_shapefile/gis_osm_landuse_a_free_1.shp')
print(landuse.columns)
print(landuse.shape)
landuse.head(2)

Index(['osm_id', 'code', 'fclass', 'name', 'geometry'], dtype='object')
(121223, 5)


Unnamed: 0,osm_id,code,fclass,name,geometry
0,4182605,7202,park,維多利亞公園 Victoria Park,"POLYGON ((114.18594 22.28351, 114.18621 22.283..."
1,4977404,7202,park,九龍公園 Kowloon Park,"POLYGON ((114.16828 22.30104, 114.16854 22.301..."


In [7]:
station = pd.read_csv('UrbanEVDataset/20220901-20230228_station-raw/station_information.csv')
station = station[['station_id', 'longitude', 'latitude', 'TAZID']]

#转换为GeoDataFrame
station['geometry'] = gpd.points_from_xy(station['longitude'],station['latitude'])
station = gpd.GeoDataFrame(station)
station = station.set_crs('EPSG:4326')
print(type(station))
print(station.shape)
station.head(2)

<class 'geopandas.geodataframe.GeoDataFrame'>
(1682, 5)


Unnamed: 0,station_id,longitude,latitude,TAZID,geometry
0,1001,113.784724,22.714121,559,POINT (113.78472 22.71412)
1,1002,113.785002,22.7259,558,POINT (113.785 22.7259)


In [8]:
#第一步点和区域进行匹配
matched_all_point = gpd.sjoin(station, landuse, how='left', predicate='within')
matched_all_point = matched_all_point.drop_duplicates(subset=['station_id'])
matched_all_point = matched_all_point[['station_id', 'longitude', 'latitude', 'TAZID', 'fclass', 'geometry']]
matched_all_point.head(2)

Unnamed: 0,station_id,longitude,latitude,TAZID,fclass,geometry
0,1001,113.784724,22.714121,559,,POINT (113.78472 22.71412)
1,1002,113.785002,22.7259,558,industrial,POINT (113.785 22.7259)


In [9]:
matched_in_point = matched_all_point[matched_all_point.notna().all(axis=1)]
unmatched_point = matched_all_point[matched_all_point['fclass'].isna()]

matched_out_point = gpd.sjoin_nearest(unmatched_point, landuse, how='left')
matched_out_point = matched_out_point.drop_duplicates(subset=['station_id'])
matched_out_point = matched_out_point[['station_id', 'longitude', 'latitude', 'TAZID', 'fclass_right', 'geometry']]
matched_out_point.columns = ['station_id', 'longitude', 'latitude', 'TAZID', 'fclass', 'geometry']

matched_point = pd.concat([matched_in_point, matched_out_point], axis=0)
matched_point = matched_point.reset_index()
matched_point = matched_point.drop('index',axis=1)

print(matched_all_point.shape)
print(matched_in_point.shape)
print(matched_out_point.shape)
print(matched_point.shape)

(1682, 6)
(716, 6)
(966, 6)
(1682, 6)





In [18]:
matched_point.head(2)

Unnamed: 0,station_id,longitude,latitude,TAZID,fclass,geometry
0,1002,113.785002,22.7259,558,industrial,POINT (113.785 22.7259)
1,1009,113.794798,22.716747,595,industrial,POINT (113.7948 22.71675)


In [23]:
from scipy.stats import entropy

def calculate_shannon_entropy(group):
    value_counts = group['fclass'].value_counts()
    proportions = value_counts / len(group)
    return entropy(proportions, base=np.e)  # 使用自然对数

landuse_shannon = matched_point.groupby('TAZID').apply(calculate_shannon_entropy).reset_index()
landuse_shannon.columns = ['TAZID', 'shannon']
landuse_shannon.head(2)

  landuse_shannon = matched_point.groupby('TAZID').apply(calculate_shannon_entropy).reset_index()


Unnamed: 0,TAZID,shannon
0,102,0.0
1,104,0.636514


In [24]:
result_landuse = pd.merge(zone, landuse_shannon, on='TAZID', how='left')
result_landuse = result_landuse[['TAZID', 'shannon']]
result_landuse.head(2)

Unnamed: 0,TAZID,shannon
0,1066,0.37677
1,1068,1.205698


In [25]:
result_landuse.to_csv('landuse.csv',index=None)

## Road network

In [13]:
roads = gpd.read_file('guangdong_latest_shapefile/guangdong_latest_shapefile/gis_osm_roads_free_1.shp')
#是否需要挑选出特定类型的道路，人行道等是否需要加入进去
roads = roads[['osm_id', 'geometry']]
road = roads.set_crs(epsg=4326)
print(roads.shape)
roads.head(2)

(842879, 2)


Unnamed: 0,osm_id,geometry
0,4195000,"LINESTRING (114.18562 22.28266, 114.1856 22.28..."
1,4338869,"LINESTRING (114.16215 22.24868, 114.16294 22.2..."


In [14]:
zone_geo.head(2)

Unnamed: 0,TAZID,area,geometry
0,1066,1577892.982,"POLYGON ((113.91139 22.51521, 113.90724 22.514..."
1,1068,1405268.834,"POLYGON ((113.92207 22.51568, 113.91904 22.507..."


In [15]:
def calc_road_length_by_region(roads_gdf, regions_gdf, region_id_col, target_crs="EPSG:32650"):
    # 转换到目标投影坐标系（米）
    roads_proj = roads_gdf.to_crs(target_crs)
    regions_proj = regions_gdf.to_crs(target_crs)

    # 计算交集
    inter = gpd.overlay(roads_proj, regions_proj, how="intersection")

    # 计算交集部分的长度（米）
    inter["length"] = inter.geometry.length

    # 按行政区聚合
    result = inter.groupby(region_id_col)["length"].sum().reset_index()

    # 合并回原始 regions（保留经纬度坐标系）
    regions_with_len = regions_gdf.merge(result, on=region_id_col, how="left")

    return regions_with_len

In [16]:
roads_matched = calc_road_length_by_region(roads, zone_geo, "TAZID")
roads_matched['road_density'] = (roads_matched['length']/roads_matched['area'])*1e3
roads_matched.head(2)

Unnamed: 0,TAZID,area,geometry,length,road_density
0,1066,1577892.982,"POLYGON ((113.91139 22.51521, 113.90724 22.514...",42436.418621,26.894358
1,1068,1405268.834,"POLYGON ((113.92207 22.51568, 113.91904 22.507...",31207.977606,22.207834


In [17]:
roads_matched[['TAZID','road_density']].to_csv('road_density.csv',index=None)