In [1]:
import pandas as pd
import geopandas as gpd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import numpy as np

In [2]:
zone_information = pd.read_csv('/gpfsnyu/home/yw9871/New data/Original_data/zone-information.csv')
zone_information = zone_information[['TAZID','longitude','latitude']]
zone_information.head(2)

Unnamed: 0,TAZID,longitude,latitude
0,1066,113.910301,22.521287
1,1068,113.91868,22.517837


In [3]:
districts = gpd.read_file('/gpfsnyu/home/yw9871/New data/Original_data/Shenzhen/Shenzhen.shp')
districts['TAZID'] = districts['TAZID'].astype(int)
districts = districts[['TAZID','geometry']]
districts = districts.set_crs(epsg=3857)
districts = districts.to_crs(epsg=4326)
districts.head(2)

Unnamed: 0,TAZID,geometry
0,1054,"POLYGON ((114.33501 22.81134, 114.33611 22.810..."
1,1052,"POLYGON ((114.28016 22.81017, 114.28035 22.808..."


In [4]:
landuse_diversity = pd.read_csv('/gpfsnyu/home/yw9871/New data/Results/landuse.csv')
landuse_diversity.head(2)

Unnamed: 0,TAZID,shannon
0,1066,0.37677
1,1068,1.205698


In [5]:
freeway_distance = pd.read_csv('/gpfsnyu/home/yw9871/New data/Results/freeway_distance.csv')
freeway_distance = freeway_distance[['TAZID','distance']]
freeway_distance.head(2)

Unnamed: 0,TAZID,distance
0,1066,2.669259
1,1068,3.092942


In [6]:
road_density = pd.read_csv('/gpfsnyu/home/yw9871/New data/Results/road_density.csv')
road_density = road_density[['TAZID','road_density']]
road_density.head(2)

Unnamed: 0,TAZID,road_density
0,1066,26.894358
1,1068,22.207834


In [7]:
poi = pd.read_csv('/gpfsnyu/home/yw9871/New data/Results/poi.csv')
poi['geometry'] = gpd.points_from_xy(poi['longitude'],poi['latitude'])
poi = gpd.GeoDataFrame(poi)
poi = poi.set_crs('EPSG:4326')
poi.head(2)

Unnamed: 0,primary_types,longitude,latitude,geometry
0,lifestyle services,114.576628,22.494887,POINT (114.57663 22.49489)
1,lifestyle services,114.579614,22.492192,POINT (114.57961 22.49219)


In [8]:
#test = pd.merge(landuse_diversity,zone_geo,on='TAZID',how='left')
#test['geometry'] = gpd.points_from_xy(test['longitude'],test['latitude'])
#test = gpd.GeoDataFrame(test)
#test = test.set_crs('EPSG:4326')
#test.to_file("test.geojson", driver='GeoJSON')

In [9]:
poi_matched = gpd.sjoin(poi, districts, how='left', predicate='within')
poi_num = poi_matched[poi_matched.notna().all(axis=1)]
poi_num = poi_num.groupby(by='TAZID')['primary_types'].count().reset_index()
poi_num['TAZID'] = poi_num['TAZID'].astype(int)
poi_num.columns = ['TAZID','poi']
print(poi_num.shape)
poi_num.head(2)

(490, 2)


Unnamed: 0,TAZID,poi
0,102,940
1,104,3088


In [10]:
#poi_num.to_csv('/gpfsnyu/home/yw9871/New data/Ready_to_model/poi_num.csv',index=None)

In [11]:
gdp_volume = pd.read_csv('/gpfsnyu/home/yw9871/New data/Results/gdp_volume.csv')
gdp_volume.head(2)

Unnamed: 0,TAZID,gdp_weight,volume_weight
0,1066,0.248592,0.127608
1,1068,0.248592,0.127608


In [12]:
static_fea = pd.merge(landuse_diversity, freeway_distance, on='TAZID', how='left')
static_fea = pd.merge(static_fea, road_density, on='TAZID', how='left')
static_fea = pd.merge(static_fea, poi_num, on='TAZID', how='left')
static_fea = pd.merge(static_fea, gdp_volume, on='TAZID', how='left')
static_fea = static_fea[['TAZID','shannon','distance','road_density','poi','gdp_weight','volume_weight']]
print(static_fea.shape)
static_fea.head(2)

(275, 7)


Unnamed: 0,TAZID,shannon,distance,road_density,poi,gdp_weight,volume_weight
0,1066,0.37677,2.669259,26.894358,2520,0.248592,0.127608
1,1068,1.205698,3.092942,22.207834,3478,0.248592,0.127608


In [13]:
occ = pd.read_csv('/gpfsnyu/home/yw9871/New data/Ready_to_model/occupancy.csv', header=0, index_col=0)
print(occ.shape[0], occ.shape[1])

4344 275


In [14]:
static_fea = static_fea.set_index(static_fea['TAZID'].astype(str))
static_fea = static_fea.loc[occ.columns]  # 重新排序
static_fea.head(2)

Unnamed: 0,TAZID,shannon,distance,road_density,poi,gdp_weight,volume_weight
102,102,0.0,4.511284,24.8933,940,0.168073,0.169951
104,104,0.636514,4.190836,23.86439,3088,0.081431,0.130426


In [15]:
price_scaler = MinMaxScaler(feature_range=(0, 1))
static_fea_standard = price_scaler.fit_transform(static_fea)
static_fea_standard = pd.DataFrame(static_fea_standard)
print(static_fea_standard.describe())
static_fea_standard.head(2)

                0           1           2           3           4           5  \
count  275.000000  275.000000  275.000000  275.000000  275.000000  275.000000   
mean     0.589889    0.285984    0.155463    0.310792    0.248462    0.546690   
std      0.289531    0.272011    0.097047    0.182213    0.186983    0.267548   
min      0.000000    0.000000    0.000000    0.000000    0.000000    0.000000   
25%      0.397292    0.000000    0.093958    0.180560    0.107198    0.325990   
50%      0.592904    0.309824    0.147791    0.263792    0.204375    0.569120   
75%      0.842670    0.429162    0.201688    0.416458    0.362174    0.659208   
max      1.000000    1.000000    1.000000    1.000000    1.000000    1.000000   

                6  
count  275.000000  
mean     0.593997  
std      0.205043  
min      0.000000  
25%      0.452042  
50%      0.528469  
75%      0.688814  
max      1.000000  


Unnamed: 0,0,1,2,3,4,5,6
0,0.0,0.0,0.192966,0.465721,0.132392,0.659208,1.0
1,0.001867,0.394097,0.178397,0.445132,0.435568,0.2925,0.709527


In [16]:
static_fea_standard.to_csv('/gpfsnyu/home/yw9871/New data/Ready_to_model/static_fea_stanard.csv',index=None)