In [1]:
import pandas as pd
import geopandas as gpd
from shapely import geometry

In [2]:
buildings = pd.read_stata("../data/harvard_data/BUILDING_LV.dta")

In [3]:
extraspace = pd.read_stata("../data/harvard_data/CONSTR_EXTRASPACE.dta")
world = pd.read_stata("../data/harvard_data/CONSTR_WORLD.dta")
engineering = pd.read_stata("../data/harvard_data/ENGINEERING_DATA.dta")

In [4]:
grid_long = pd.read_stata("../data/harvard_data/GRID_LONG.dta")
grid_wide = pd.read_stata("../data/harvard_data/GRID_WIDE.dta")
pin = pd.read_stata("../data/harvard_data/PIN_USING.dta")

In [5]:
pin.head()

Unnamed: 0,pin_12,pin,yrbuilt,cc,lLV,lFSI
0,13313070000000.0,1331307000.0,1925.0,1920.0,-0.510826,-1.403821
1,13311240000000.0,1331124000.0,1995.0,1990.0,2.174752,0.151158
2,13311240000000.0,1331124000.0,1997.0,1990.0,2.174752,0.140062
3,13311240000000.0,1331124000.0,1997.0,1990.0,2.174752,0.159561
4,13311240000000.0,1331124000.0,1995.0,1990.0,2.174752,0.147677


In [6]:
buildings['x_build_coord']

0       222.370804
1       222.083099
2       222.089325
3       223.207932
4       223.228317
           ...    
1732    222.416153
1733    222.405655
1734    221.336533
1735    221.442520
1736    221.468307
Name: x_build_coord, Length: 1737, dtype: float32

In [7]:
grid_wide.dtypes

grid_id         int32
latitude      float32
longitude     float32
x_coord       float32
y_coord       float32
lv1913        float32
lv1926        float32
lv1932        float32
lv1939        float32
lv1949        float32
lv1961        float32
lv1971        float32
lv1981        float32
lv1990        float32
lv2000        float32
lv2009        float32
lv1873        float32
lv1892        float32
llv1913       float32
llv1926       float32
llv1932       float32
llv1939       float32
llv1949       float32
llv1961       float32
llv1971       float32
llv1981       float32
llv1990       float32
llv1873       float64
llv1892       float64
llv2000       float64
llv2009       float64
nllv1913      float32
nllv1926      float32
nllv1932      float32
nllv1939      float32
nllv1949      float32
nllv1961      float32
nllv1971      float32
nllv1981      float32
nllv1990      float32
nllv1873      float64
nllv1892      float64
nllv2000      float64
nllv2009      float64
dist_cbd      float32
ldist_cbd 

In [8]:
def create_grid_square(row):
    n = row.y_coord + 165
    s = row.y_coord - 165
    w = row.x_coord + 165
    e = row.x_coord - 165
    polygon = geometry.Polygon([[w, n], [e, n], [e, s], [w, s]])
    return polygon

geometries = grid_wide.apply(create_grid_square, axis=1)

In [9]:
grid_gdf = gpd.GeoDataFrame(grid_wide, geometry=geometries, crs="ESRI:102671")

In [10]:
columns_to_drop = ['dist_cbd', 'ldist_cbd', 'dist_river', 'dist_lm', "ldist_lm"]
for prefix in ["llv", 'nllv']:
    for year in [1873, 1892, 1913, 1926, 1932, 1939, 1949, 1961, 1971, 1981, 1990, 2000, 2009]:
        columns_to_drop.append(prefix + str(year))
        

In [11]:
grid_gdf.drop(inplace=True, columns=columns_to_drop)

In [12]:
grid_gdf.to_file("../data/olcott_land_values.geojson", driver="GeoJSON")