# Generate FUDS test data

Creating the fixture data for the Formerly Used Defense Sites (FUDS) is pretty involved. The below walks through creating the data and then eyeballing it so you can check your test results. So, if the FUDS updates and you want to generate new sample data for your tests, run this notebook.

In [1]:
import os
import sys
from data_pipeline.etl.sources.geo_utils import add_tracts_for_geometries
import json

# Add this project to the path
module_path = os.path.abspath(os.path.join("../.."))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import geopandas as gpd
import pandas as pd
from data_pipeline.etl.sources.census.etl import CensusETL
from data_pipeline.etl.sources.us_army_fuds.etl import USArmyFUDS

In [3]:
%load_ext lab_black

# Load the source data and census tract data

In [4]:
# get the data
etl = USArmyFUDS()
etl.extract()

2022-08-10 17:57:23,542 [data_pipeline.etl.sources.us_army_fuds.etl] INFO     Starting data download.
2022-08-10 17:57:23,542 [data_pipeline.utils] INFO     Downloading https://opendata.arcgis.com/api/v3/datasets/3f8354667d5b4b1b8ad7a6e00c3cf3b1_1/downloads/data?format=geojson&spatialRefId=4326&where=1%3D1


In [5]:
df = gpd.read_file(etl.DOWNLOAD_FILE_NAME, lowmemory=False)

In [6]:
census_tracts = gpd.read_file(CensusETL.NATIONAL_TRACT_JSON_PATH)

In [7]:
census_tracts.set_index("GEOID10", inplace=True)

# Generate the test data

In [8]:
with open(etl.DOWNLOAD_FILE_NAME) as geojson:
    raw_fuds_geojson = json.load(geojson)

In [None]:
tract_df = add_tracts_for_geometries(df)

In [None]:
example_geoids = pd.read_csv(
    "../tests/sources/example/data/extract.csv", dtype="object"
)

In [None]:
merged_exaple_data = pd.merge(
    example_geoids["GEOID10_TRACT"],
    tract_df,
    on="GEOID10_TRACT",
    how="left",
    indicator=True,
)

In [None]:
merged_exaple_data[merged_exaple_data["_merge"] == "left_only"]

In [None]:
original_crs = census_tracts.crs
points = (
    census_tracts.to_crs(epsg=3395)
    .loc[
        merged_exaple_data[(merged_exaple_data["_merge"] == "left_only")]
        .query('not GEOID10_TRACT.str.startswith("06")')
        .GEOID10_TRACT
    ]
    .centroid.to_crs(original_crs)
    .to_dict()
)

In [None]:
object_ids_to_keep = set(
    merged_exaple_data[merged_exaple_data["_merge"] == "both"].OBJECTID.astype("int")
)
features = []
for feature in raw_fuds_geojson["features"]:
    if feature["properties"]["OBJECTID"] in object_ids_to_keep:
        features.append(feature)

In [None]:
def make_fake_feature(
    state: str, has_projects: bool, is_eligible: bool, latitude: float, longitude: float
):
    """For tracts where we don't have a FUDS, fake one."""
    make_fake_feature._object_id += 1
    return {
        "type": "Feature",
        "properties": {
            "OBJECTID": make_fake_feature._object_id,
            "CENTROIDLAT": None,
            "CENTROIDLONG": None,
            "CLOSESTCITY": None,
            "CONGRESSIONALDISTRICT": "15",
            "COUNTY": None,
            "CURRENTOWNER": None,
            "DODFUDSPROPERTYIDPK": " ",
            "ELIGIBILITY": "Eligible" if is_eligible else "Ineligible",
            "EMSMGMTACTIONPLANLINK": "https://fudsportal.usace.army.mil/ems/inventory/map?id=54113",
            "EPAREGION": "06",
            "FEATUREDESCRIPTION": None,
            "FEATURENAME": "NEIL, ET AL, PROPERTIES",
            "FUDSINSTALLATIONID": None,
            "FUDSUNIQUEPROPERTYNUMBER": "K06TX1120",
            "HASPROJECTS": "Yes" if has_projects else "No",
            "LATITUDE": latitude,
            "LONGITUDE": longitude,
            "MEDIAID": None,
            "METADATAID": None,
            "NOFURTHERACTION": None,
            "PROJECTREQUIRED": "No",
            "SDSID": None,
            "SITEELIGIBILITY": None,
            "STATE": state,
            "STATUS": "Properties with projects"
            if has_projects
            else "Properties without projects",
            "STATUSCODE": "Not on the NPL",
            "USACEDISTRICT": "swf",
            "FISCALYEAR": "2019",
            "PROPERTY_HISTORY": None,
            "USACEDIVISION": "swd",
        },
        "geometry": {
            "type": "Point",
            "coordinates": [longitude, latitude],
        },
    }


make_fake_feature._object_id = 50

In [None]:
# Create FUDS in CA for each tract that doesn't have a FUDS
for tract_id, point in points.items():
    for bools in [(True, True), (True, False), (False, False)]:
        features.append(make_fake_feature("CA", bools[0], bools[1], point.y, point.x))

In [None]:
test_fuds_geojson = raw_fuds_geojson.copy()
test_fuds_geojson["features"] = features

In [None]:
with open("../tests/sources/us_army_fuds/data/fuds.geojson", "w") as outfile:
    json.dump(test_fuds_geojson, outfile)

# Eyeball the data to check the results of the tests

In [None]:
test_frame = gpd.read_file("../tests/sources/us_army_fuds/data/fuds.geojson")

In [None]:
test_frame_with_tracts_full = test_frame_with_tracts = add_tracts_for_geometries(
    test_frame
)

## Pre-compute the long, lat: tract relationship for use in a mock in the tests

In [None]:
test_frame_with_tracts = test_frame_with_tracts.set_index(
    ["GEOID10_TRACT", "OBJECTID"]
)[["ELIGIBILITY", "HASPROJECTS"]]

In [None]:
tracts = test_frame_with_tracts_full[["GEOID10_TRACT", "geometry"]].drop_duplicates()
tracts["lat_long"] = test_frame_with_tracts_full.geometry.apply(
    lambda point: (point.x, point.y)
)
tracts.set_index("lat_long")["GEOID10_TRACT"].to_dict()

## Look at the sample data itself

In [None]:
test_frame_with_tracts

In [None]:
test_frame_with_tracts.sort_index()