In [1]:
import geopandas as gpd

tokyo_gdf = gpd.read_file("tokyo.geojson")

In [3]:
for col in tokyo_gdf.columns:
    print(f"- {col}")

- KEY_CODE
- 2023_building_count
- 2023_office_count
- 2023_commercial_count
- 2023_hotel_count
- 2023_house_count
- 2023_apartment_count
- 2023_logistics_count
- 2023_office_total_use_area
- 2023_commercial_total_use_area
- 2023_hotel_total_use_area
- 2023_house_total_use_area
- 2023_apartment_total_use_area
- 2023_logistics_total_use_area
- 2023_total_use_area
- 1996_building_count
- 2001_building_count
- 2006_building_count
- 2011_building_count
- 2016_building_count
- 2023_old__building_count
- 1996_office_count
- 2001_office_count
- 2006_office_count
- 2011_office_count
- 2016_office_count
- 2023_old__office_count
- 1996_commercial_count
- 2001_commercial_count
- 2006_commercial_count
- 2011_commercial_count
- 2016_commercial_count
- 2023_old__commercial_count
- 1996_hotel_count
- 2001_hotel_count
- 2006_hotel_count
- 2011_hotel_count
- 2016_hotel_count
- 2023_old__hotel_count
- 1996_house_count
- 2001_house_count
- 2006_house_count
- 2011_house_count
- 2016_house_count
- 2023_old

In [4]:
years = [1996, 2001, 2006, 2011, 2016, 2023]
for year in years:
    house_col = f"{year}_house_total_use_area"
    apt_col = f"{year}_apartment_total_use_area"
    total_col = f"{year}_housing_total_use_area"
    tokyo_gdf[total_col] = tokyo_gdf[house_col].fillna(0) + tokyo_gdf[apt_col].fillna(0)
    # Set to NaN if both are NaN
    both_nan = tokyo_gdf[house_col].isna() & tokyo_gdf[apt_col].isna()
    tokyo_gdf.loc[both_nan, total_col] = float('nan')

In [5]:
# List the new fields (columns ending with '_housing_total_use_area')
new_fields = [col for col in tokyo_gdf.columns if col.endswith('_housing_total_use_area')]
print("New fields:")
for field in new_fields:
    print(f"- {field}")

# Show some of their values
tokyo_gdf[new_fields].head()

New fields:
- 1996_housing_total_use_area
- 2001_housing_total_use_area
- 2006_housing_total_use_area
- 2011_housing_total_use_area
- 2016_housing_total_use_area
- 2023_housing_total_use_area


Unnamed: 0,1996_housing_total_use_area,2001_housing_total_use_area,2006_housing_total_use_area,2011_housing_total_use_area,2016_housing_total_use_area,2023_housing_total_use_area
0,,,,,,
1,,,,,,
2,87612.71812,101087.06284,114823.492054,134833.81841,154795.00879,123005.503
3,124567.82399,159349.128352,232998.659129,241418.05294,239981.84673,169668.4005
4,40731.61528,46865.45114,55806.765863,58108.711,62743.28499,50536.397


In [8]:
for year in years:
    # Define column names for each use category
    commercial_col = f"{year}_commercial_total_use_area"
    logistics_col = f"{year}_logistics_total_use_area"
    hotel_col = f"{year}_hotel_total_use_area"
    other_col = f"{year}_other_total_use_area"
    
    # Combine the three categories, treating NaN as 0, but set to NaN if all are NaN
    tokyo_gdf[other_col] = (
        tokyo_gdf[commercial_col].fillna(0) +
        tokyo_gdf[logistics_col].fillna(0) +
        tokyo_gdf[hotel_col].fillna(0)
    )
    all_nan = (
        tokyo_gdf[commercial_col].isna() &
        tokyo_gdf[logistics_col].isna() &
        tokyo_gdf[hotel_col].isna()
    )
    tokyo_gdf.loc[all_nan, other_col] = float('nan')

In [9]:
# Select columns to keep: 'KEY_CODE', columns containing 'building_count', 'housing', 'office', or 'other'
cols_to_keep = ['KEY_CODE'] + [col for col in tokyo_gdf.columns if (
    'building_count' in col or 'housing' in col or 'office' in col or 'other' in col
)]
tokyo_slim = tokyo_gdf[cols_to_keep].copy()
tokyo_slim.head()

Unnamed: 0,KEY_CODE,2023_building_count,2023_office_count,2023_office_total_use_area,1996_building_count,2001_building_count,2006_building_count,2011_building_count,2016_building_count,2023_old__building_count,...,2006_housing_total_use_area,2011_housing_total_use_area,2016_housing_total_use_area,2023_housing_total_use_area,1996_other_total_use_area,2001_other_total_use_area,2006_other_total_use_area,2011_other_total_use_area,2016_other_total_use_area,2023_other_total_use_area
0,533925552,2.0,,,,,,,1.0,1.0,...,,,,,,,,,,
1,533925554,,,,,,,,,,...,,,,,,,,,,
2,533925561,664.0,16.0,13706.932,529.0,473.0,488.0,492.0,584.0,547.0,...,114823.492054,134833.81841,154795.00879,123005.503,19409.38366,17153.774595,20110.995828,20715.53687,2629.8945,2816.67
3,533925562,1071.0,15.0,2347.47,967.0,939.0,905.0,901.0,1008.0,880.0,...,232998.659129,241418.05294,239981.84673,169668.4005,4493.87552,3109.528341,4700.985586,4246.00769,4070.09452,3208.36
4,533925563,557.0,1.0,157.12,485.0,485.0,479.0,486.0,529.0,466.0,...,55806.765863,58108.711,62743.28499,50536.397,2001.391,1824.047786,92.17831,129.16658,129.25711,112.95


In [10]:
tokyo_slim_gdf = gpd.GeoDataFrame(tokyo_slim, geometry=tokyo_gdf.geometry)
tokyo_slim_gdf.to_file("tokyo_slim.geojson", driver="GeoJSON")