In [3]:
import pandas as pd
import geopandas as gpd
import pyogrio
from data_pipeline.etl.sources.census.etl import CensusETL
from data_pipeline.etl.sources.geocorr_alternatives.etl import GeoCorrAlternativesETL

import geopandas as gpd
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.etl.sources.geo_utils import (
    add_tracts_for_geometries,
    get_tract_geojson,
)
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger, unzip_file_from_url

logger = get_module_logger(__name__)


In [18]:

# ZCTA_2020_SHAPEFILE_PATH = (
#     "https://www2.census.gov/geo/tiger/GENZ2020/shp/cb_2020_us_zcta520_500k.zip"
# )

# ZCTA_2010_SHAPEFILE_PATH = (
#     "https://www2.census.gov/geo/tiger/GENZ2019/shp/cb_2019_us_zcta510_500k.zip"
# )

ZCTA_2020_SHAPEFILE_PATH = (
    "~/Downloads/cb_2020_us_zcta520_500k"
)

ZCTA_2010_SHAPEFILE_PATH = (
     "~/Downloads/cb_2019_us_zcta510_500k"
)

ZCTA_2010_FIELD = "ZCTA5CE10"

PERCENT_OF_2020_in_2010_FIELD = "percent of 2020 in 2010"

In [11]:
# Read in ZCTA data.
zcta_2020_gdf = gpd.read_file(
    filename=ZCTA_2020_SHAPEFILE_PATH
)
zcta_2020_gdf = zcta_2020_gdf.rename(
    columns={GeoCorrAlternativesETL.ZIP_CODE_INPUT_FIELD: field_names.ZIP_CODE},
    errors="raise",
)


# Read in ZCTA data.
zcta_2010_gdf = gpd.read_file(
    filename=ZCTA_2010_SHAPEFILE_PATH
)

In [14]:
#switch to projected 
zcta_2020_gdf=zcta_2020_gdf.to_crs(crs=GeoCorrAlternativesETL.CRS_INTEGER)
zcta_2010_gdf=zcta_2010_gdf.to_crs(crs=GeoCorrAlternativesETL.CRS_INTEGER)

In [15]:
zcta_2020_gdf["zcta_2020_area"] = zcta_2020_gdf.area

In [16]:
joined_gdf = gpd.overlay(
            df1=zcta_2020_gdf,
            df2=zcta_2010_gdf,
            how="intersection",
            keep_geom_type=False,
        )

# Calculating the areas of the newly-created overlapping geometries
joined_gdf[GeoCorrAlternativesETL.AREA_JOINED_FIELD] = joined_gdf.area

# Calculating the areas of the newly-created geometries in relation
# to the original tract geometries
joined_gdf[PERCENT_OF_2020_in_2010_FIELD] = (
    joined_gdf[GeoCorrAlternativesETL.AREA_JOINED_FIELD] / joined_gdf["zcta_2020_area"]
)

In [21]:
joined_gdf[PERCENT_OF_2020_in_2010_FIELD].round(decimals=1).value_counts()

0.0    164183
1.0     17832
0.9      9812
0.1      8771
0.8      2787
0.2      2029
0.7      1454
0.3      1146
0.6       988
0.4       894
0.5       800
Name: percent of 2020 in 2010, dtype: int64

In [17]:
joined_gdf

Unnamed: 0,Zip code,AFFGEOID20,GEOID20,NAME20,LSAD20,ALAND20,AWATER20,zcta_2020_area,ZCTA5CE10,AFFGEOID10,GEOID10,ALAND10,AWATER10,geometry,area_joined,percent of 2020 in 2010
0,35768,860Z200US35768,35768,35768,Z5,446231990,3736014,6.688056e+08,35776,8600000US35776,35776,234072461,1041223,GEOMETRYCOLLECTION (POLYGON ((-9597648.456 411...,2.193417e+06,0.003280
1,35769,860Z200US35769,35769,35769,Z5,163279214,57835709,3.270629e+08,35776,8600000US35776,35776,234072461,1041223,GEOMETRYCOLLECTION (POLYGON ((-9596251.174 410...,4.423302e+03,0.000014
2,35776,860Z200US35776,35776,35776,Z5,268376689,1277083,3.994217e+08,35776,8600000US35776,35776,234072461,1041223,GEOMETRYCOLLECTION (POLYGON ((-9609657.158 411...,3.377086e+08,0.845494
3,35774,860Z200US35774,35774,35774,Z5,36139337,362969,5.424829e+07,35776,8600000US35776,35776,234072461,1041223,GEOMETRYCOLLECTION (POLYGON ((-9592577.408 413...,3.181710e+06,0.058651
4,35747,860Z200US35747,35747,35747,Z5,195112094,9300885,3.016341e+08,35776,8600000US35776,35776,234072461,1041223,GEOMETRYCOLLECTION (POLYGON ((-9608245.738 410...,5.117346e+06,0.016965
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210691,72046,860Z200US72046,72046,72046,Z5,603015221,20383513,9.214782e+08,72037,8600000US72037,72037,388714,0,"MULTIPOLYGON (((-10226266.281 4102252.880, -10...",5.753217e+05,0.000624
210692,13156,860Z200US13156,13156,13156,Z5,76214823,6028269,1.521320e+08,13064,8600000US13064,13064,461830,2948339,"MULTIPOLYGON (((-8538756.977 5360283.214, -853...",6.455585e+06,0.042434
210693,06850,860Z200US06850,06850,06850,Z5,17563836,206664,3.140506e+07,06856,8600000US06856,06856,9568,0,"POLYGON ((-8173142.581 5028860.482, -8173156.2...",1.245927e+04,0.000397
210694,99632,860Z200US99632,99632,99632,Z5,65153947,0,2.965687e+08,99632,8600000US99632,99632,65153947,0,"POLYGON ((-18231773.509 8888791.228, -18230845...",2.959838e+08,0.998028
