In [1]:
import pandas as pd
import geopandas as gpd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import numpy as np

In [2]:
#/gpfsnyu/home/yw9871/New data/
zone_information = pd.read_csv('Original_data/zone-information.csv')
zone_information = zone_information[['TAZID','longitude','latitude','area']]
zone_information.head(2)

Unnamed: 0,TAZID,longitude,latitude,area
0,1066,113.910301,22.521287,1577892.982
1,1068,113.91868,22.517837,1405268.834


In [3]:
districts = gpd.read_file('Original_data/Shenzhen/Shenzhen.shp')
districts['TAZID'] = districts['TAZID'].astype(int)
districts = districts[['TAZID','geometry']]
districts = districts.set_crs(epsg=3857)
districts = districts.to_crs(epsg=4326)
districts.head(2)

Unnamed: 0,TAZID,geometry
0,1054,"POLYGON ((114.33501 22.81134, 114.33611 22.810..."
1,1052,"POLYGON ((114.28016 22.81017, 114.28035 22.808..."


In [4]:
landuse_diversity = pd.read_csv('Results/landuse.csv')
landuse_diversity.head(2)

Unnamed: 0,TAZID,shannon
0,1066,0.37677
1,1068,1.205698


In [5]:
freeway_distance = pd.read_csv('Results/freeway_distance.csv')
freeway_distance = freeway_distance[['TAZID','distance']]
freeway_distance.head(2)

Unnamed: 0,TAZID,distance
0,1066,2.669259
1,1068,3.092942


In [6]:
road_density = pd.read_csv('Results/road_density.csv')
road_density = road_density[['TAZID','road_density']]
road_density.head(2)

Unnamed: 0,TAZID,road_density
0,1066,26.894358
1,1068,22.207834


In [7]:
poi = pd.read_csv('Results/poi.csv')
poi['geometry'] = gpd.points_from_xy(poi['longitude'],poi['latitude'])
poi = gpd.GeoDataFrame(poi)
poi = poi.set_crs('EPSG:4326')
poi.head(2)

Unnamed: 0,primary_types,longitude,latitude,geometry
0,lifestyle services,114.576628,22.494887,POINT (114.57663 22.49489)
1,lifestyle services,114.579614,22.492192,POINT (114.57961 22.49219)


In [8]:
#test = pd.merge(landuse_diversity,zone_geo,on='TAZID',how='left')
#test['geometry'] = gpd.points_from_xy(test['longitude'],test['latitude'])
#test = gpd.GeoDataFrame(test)
#test = test.set_crs('EPSG:4326')
#test.to_file("test.geojson", driver='GeoJSON')

In [9]:
poi_matched = gpd.sjoin(poi, districts, how='left', predicate='within')
poi_num = poi_matched[poi_matched.notna().all(axis=1)]
poi_num = poi_num.groupby(by='TAZID')['primary_types'].count().reset_index()
poi_num['TAZID'] = poi_num['TAZID'].astype(int)
poi_num.columns = ['TAZID','poi']
print(poi_num.shape)
poi_num.head(2)

(490, 2)


Unnamed: 0,TAZID,poi
0,102,940
1,104,3088


In [10]:
poi_density = pd.merge(zone_information, poi_num, how='left', on='TAZID')
poi_density['poi_density'] = poi_density['poi']/poi_density['area']
poi_density = poi_density[['TAZID','poi_density']]
poi_density.head(2)

Unnamed: 0,TAZID,poi_density
0,1066,0.001597
1,1068,0.002475


In [11]:
#poi_num.to_csv('/gpfsnyu/home/yw9871/New data/Ready_to_model/poi_num.csv',index=None)

In [12]:
gdp_volume = pd.read_csv('Results/gdp_volume.csv')
gdp_volume.columns = ['TAZID','gdp_weight','evvolume_weight']
gdp_volume.head(2)

Unnamed: 0,TAZID,gdp_weight,evvolume_weight
0,1066,0.248592,0.127608
1,1068,0.248592,0.127608


In [13]:
metro = pd.read_csv('Results/metro.csv')
print(metro.shape)
metro.head(2)

(2114, 5)


Unnamed: 0,name,location,id,longitude,latitude
0,会展城地铁站A口,"113.768906,22.711854",BX10034188,113.768906,22.711854
1,会展城(地铁站),"113.769777,22.712135",BV10861136,113.769777,22.712135


In [14]:
metro = gpd.GeoDataFrame(metro, geometry=gpd.points_from_xy(metro['longitude'], metro['latitude']), crs='EPSG:4326')
metro_matched = gpd.sjoin(metro, districts, how='left', predicate='within')
metro_num = metro_matched[metro_matched.notna().all(axis=1)]
metro_num = metro_num.groupby(by='TAZID')['id'].count().reset_index()
metro_num['TAZID'] = metro_num['TAZID'].astype(int)
metro_num.columns = ['TAZID','metro_num']
print(metro_num.shape)
metro_num.head(2)

(277, 2)


Unnamed: 0,TAZID,metro_num
0,102,7
1,104,1


In [15]:
metro_density = pd.merge(zone_information, metro_num, how='left', on='TAZID')
metro_density = metro_density.fillna(0)
metro_density['metro_density'] = metro_density['metro_num']*1e6/metro_density['area']
metro_density = metro_density[['TAZID','metro_density']]
print(metro_density.shape)
metro_density.head(2)

(275, 2)


Unnamed: 0,TAZID,metro_density
0,1066,0.0
1,1068,9.962507


In [16]:
bus = pd.read_csv('Results/bus.csv')
print(bus.shape)
bus.head(2)

(6589, 5)


Unnamed: 0,name,location,id,longitude,latitude
0,冰雪世界南公交首末站(公交站),"113.769627,22.709732",BV09447897,113.769627,22.709732
1,前海冰雪世界(公交站),"113.770256,22.716250",BV09446847,113.770256,22.71625


In [17]:
bus = gpd.GeoDataFrame(bus, geometry=gpd.points_from_xy(bus['longitude'], bus['latitude']), crs='EPSG:4326')
bus_matched = gpd.sjoin(bus, districts, how='left', predicate='within')
bus_num = bus_matched[bus_matched.notna().all(axis=1)]
bus_num = bus_num.groupby(by='TAZID')['id'].count().reset_index()
bus_num['TAZID'] = bus_num['TAZID'].astype(int)
bus_num.columns = ['TAZID','bus_num']
print(bus_num.shape)
bus_num.head(2)

(473, 2)


Unnamed: 0,TAZID,bus_num
0,102,10
1,104,7


In [18]:
bus_density = pd.merge(zone_information, bus_num, how='left', on='TAZID')
bus_density = bus_density.fillna(0)
bus_density['bus_density'] = bus_density['bus_num']*1e6/bus_density['area']
bus_density = bus_density[['TAZID','bus_density']]
print(bus_density.shape)
bus_density.head(2)

(275, 2)


Unnamed: 0,TAZID,bus_density
0,1066,6.337565
1,1068,11.385722


In [19]:
pop2022 = gpd.read_file('Results/shenzhen_population_by_district_2022.geojson')
pop2022 = pop2022[['TAZID','density','area']]
pop2022.columns = ['TAZID','2022density','area']
pop2023 = gpd.read_file('Results/shenzhen_population_by_district_2023.geojson')
pop2023 = pop2023[['TAZID','density']]
pop2023.columns = ['TAZID','2023density']
pop_density = pd.merge(pop2022, pop2023, how='left', on='TAZID')
pop_density['pop_density'] = (pop_density['2022density'] + pop_density['2023density'])/2
pop_density = pop_density[['TAZID','pop_density','area']]
pop_density.head(2)

Unnamed: 0,TAZID,pop_density,area
0,1054,9633.690481,11.490872
1,1052,3774.90569,17.081751


In [20]:
static_fea = pd.merge(landuse_diversity, freeway_distance, on='TAZID', how='left')
static_fea = pd.merge(static_fea, road_density, on='TAZID', how='left')
static_fea = pd.merge(static_fea, gdp_volume, on='TAZID', how='left')

static_fea = pd.merge(static_fea, poi_density, on='TAZID', how='left')
static_fea = pd.merge(static_fea, metro_density, on='TAZID', how='left')
static_fea = pd.merge(static_fea, bus_density, on='TAZID', how='left')
static_fea = pd.merge(static_fea, pop_density, on='TAZID', how='left')

static_fea = static_fea[['TAZID','shannon','distance','road_density','gdp_weight','evvolume_weight',
                         'poi_density', 'metro_density', 'bus_density', 'pop_density', 'area']]
print(static_fea.shape)
static_fea.head(2)

(275, 11)


Unnamed: 0,TAZID,shannon,distance,road_density,gdp_weight,evvolume_weight,poi_density,metro_density,bus_density,pop_density,area
0,1066,0.37677,2.669259,26.894358,0.248592,0.127608,0.001597,0.0,6.337565,121798.782264,1.577898
1,1068,1.205698,3.092942,22.207834,0.248592,0.127608,0.002475,9.962507,11.385722,137842.084316,1.405271


In [21]:
occ = pd.read_csv('Ready_to_model/occupancy.csv', header=0, index_col=0)
print(occ.shape[0], occ.shape[1])

4344 275


In [22]:
static_fea = static_fea.set_index(static_fea['TAZID'].astype(str))
static_fea = static_fea.loc[occ.columns]  # 重新排序
static_fea.head(2)

Unnamed: 0,TAZID,shannon,distance,road_density,gdp_weight,evvolume_weight,poi_density,metro_density,bus_density,pop_density,area
102,102,0.0,4.511284,24.8933,0.168073,0.169951,0.001316,9.798167,13.997382,136251.194834,0.714416
104,104,0.636514,4.190836,23.86439,0.081431,0.130426,0.002635,0.853436,5.974055,142263.309091,1.171736


In [23]:
#price_scaler = MinMaxScaler(feature_range=(0, 1))
#static_fea_standard = price_scaler.fit_transform(static_fea)
#static_fea_standard = pd.DataFrame(static_fea_standard)
#print(static_fea_standard.describe())
#static_fea_standard.head(2)

In [24]:
static_fea.to_csv('Ready_to_model/static_fea.csv',index=None)