### dependencies


In [1]:
import pandas as pd
import geopandas as gpd
import warnings
import topojson as tp
import requests
import os
from parcllabs import ParclLabsClient
from dotenv import load_dotenv
from requests.exceptions import RequestException
import datetime

# set metro county dictionary
county_dict = {
    '47015': 'Cannon',
    '47021': 'Cheatham',
    '47037': 'Davidson',
    '47043': 'Dickson',
    '47081': 'Hickman',
    '47111': 'Macon',
    '47119': 'Maury',
    '47147': 'Robertson',
    '47149': 'Rutherford',
    '47159': 'Smith',
    '47165': 'Sumner',
    '47169': 'Trousdale',
    '47187': 'Williamson',
    '47189': 'Wilson'
}

# nationwide FIPS codes
fips_dict = {
    '01': 'AL',
    '02': 'AK',
    '04': 'AZ',
    '05': 'AR',
    '06': 'CA',
    '08': 'CO',
    '09': 'CT',
    '10': 'DE',
    '11': 'DC',
    '12': 'FL',
    '13': 'GA',
    '15': 'HI',
    '16': 'ID',
    '17': 'IL',
    '18': 'IN',
    '19': 'IA',
    '20': 'KS',
    '21': 'KY',
    '22': 'LA',
    '23': 'ME',
    '24': 'MD',
    '25': 'MA',
    '26': 'MI',
    '27': 'MN',
    '28': 'MS',
    '29': 'MO',
    '30': 'MT',
    '31': 'NE',
    '32': 'NV',
    '33': 'NH',
    '34': 'NJ',
    '35': 'NM',
    '36': 'NY',
    '37': 'NC',
    '38': 'ND',
    '39': 'OH',
    '40': 'OK',
    '41': 'OR',
    '42': 'PA',
    '44': 'RI',
    '45': 'SC',
    '46': 'SD',
    '47': 'TN',
    '48': 'TX',
    '49': 'UT',
    '50': 'VT',
    '51': 'VA',
    '53': 'WA',
    '54': 'WV',
    '55': 'WI',
    '56': 'WY'
}

url = 'https://transition.fcc.gov/oet/info/maps/census/fips/fips.txt'

# Fetch the content from the URL
response = requests.get(url)
response.raise_for_status()  # Check that the request was successful

table = response.text.split('------------    --------------\n')[1]

# Strip leading/trailing whitespace and split by newline
lines = table.strip().split('\n')

# Create a DataFrame from the list of lines
df = pd.DataFrame(lines, columns=['Data'])

# Split the 'Data' column on the first space
df[['FIPS', 'County_name']] = df['Data'].str.split(n=1, expand=True)

# Drop the original 'Data' column
df = df.drop(columns=['Data'])

# Drop rows where 'FIPS' ends with '000'
df = df[~df['FIPS'].str.endswith('000')]

# Extract the first 2 digits from 'FIPS' column
df['State_code'] = df['FIPS'].str[:2]

# Map 'State_code' to 'State' using fips_dict
df['State'] = df['State_code'].map(fips_dict)

# Drop the 'State_code' column if not needed
df = df.drop(columns=['State_code'])

df['county_state'] = df['County_name'] + ', ' + df['State']

# Create dictionary using zip and to_dict
nationwide_FIPSdict = dict(zip(df['FIPS'], df['county_state']))


def get_dates(months_ago):
    """Gets today's date and a date `months_ago` months ago.

    Args:
      months_ago: The number of months to go back.

    Returns:
      A tuple containing today's date and the date `months_ago` months ago.
    """

    start_date = datetime.date.today()
    start_date_formatted = start_date.strftime("%Y-%m-%d")
    # Assuming 30.44 days per month on average
    end_date = (start_date - datetime.timedelta(days=months_ago *
                30.44)).strftime("%Y-%m-%d")

    return start_date_formatted, end_date

### simplify tracts, derive counties


In [None]:
# ignore the warnings that come with simplifying geographically
warnings.filterwarnings("ignore", category=RuntimeWarning)

# simplify tracts --------------------------------------
tracts = gpd.read_file('tract_outlines.gpkg')
tracts['FIPS'] = tracts['STATEFP'] + tracts['COUNTYFP']
tracts = tracts[[
    'FIPS',
    'GEOID',
    'geometry'
]]

tracts['county_name'] = tracts['FIPS'].map(nationwide_FIPSdict)

toposimplify_tracts = 0.001
tracts_simp = tp.Topology(tracts, toposimplify=toposimplify_tracts).to_gdf()
tracts_simp.to_file('tracts_simp.gpkg')

# create the counties by dissolving the tracts on the FIPS column
counties = tracts.dissolve(by='FIPS').reset_index()
counties = counties.drop(columns='GEOID')
counties['county_name'] = counties['FIPS'].map(nationwide_FIPSdict)
counties['county_stripped'] = counties['county_name'].apply(
    lambda x: x.split(' County,')[0])
counties = counties[[
    'FIPS',
    'county_name',
    'county_stripped',
    'geometry'
]]

# export simplified geometry
counties.to_file('counties_simp.gpkg')
print('export complete!')

### Convert STDB Excel files to CSV


In [None]:
# Need to open each Excel file downloaded from STDB, make a small change, and save
# Then run this script
def convert_excel_to_csv(directory, output_directory):
    for filename in os.listdir(directory):
        if filename.startswith("Color-coded maps") and filename.endswith(".xlsx"):
            # Construct the full path to the Excel file
            excel_path = os.path.join(directory, filename)

            # Read the Excel file into a DataFrame
            df = pd.read_excel(excel_path, engine='openpyxl')

            # Ensure the "Census Tract" column is of type object (string)
            df['Census Tract'] = df['Census Tract'].astype(str)

            # Rename the "Census Tract" column to "GEOID"
            df.rename(columns={'Census Tract': 'GEOID'}, inplace=True)

            # Construct the full path for the output CSV file
            csv_filename = filename.replace(".xlsx", ".csv")
            csv_path = os.path.join(output_directory, csv_filename)

            # Save the DataFrame to a CSV file
            df.to_csv(csv_path, index=False)
            print(f"Converted {filename} to {csv_filename}")


convert_excel_to_csv('Data/', 'Data/CSV/')

In [None]:
gdf = gpd.read_file('Data/counties_simp.gpkg')

gdf

### Generate a county total net dataframe


In [None]:
countyTotal_inflow = pd.read_csv('Data/inflow_CountyTotal.csv')
countyTotal_inflow['merge_ID'] = countyTotal_inflow['destination_FIPS'].astype(
    str) + '-' + countyTotal_inflow['year'].astype(str)

countyTotal_outflow = pd.read_csv('Data/outflow_CountyTotal.csv')
countyTotal_outflow['merge_ID'] = countyTotal_outflow['origin_FIPS'].astype(
    str) + '-' + countyTotal_outflow['year'].astype(str)

df_merged = pd.merge(
    countyTotal_inflow,
    countyTotal_outflow,
    on='merge_ID'
)

df_merged = df_merged.rename(columns={
    'destination_FIPS': 'FIPS',
    'year_x': 'year',
    'destination_county': 'county_name'
})

df_merged = df_merged[[
    'year',
    'FIPS',
    'county_name',
    'people_inflow',
    'agi_inflow',
    'agi_capita_inflow',
    'people_outflow',
    'agi_outflow',
    'agi_capita_outflow'
]]

df_merged = df_merged.sort_values(by='year')

df_merged['people_net'] = df_merged['people_inflow'] - \
    df_merged['people_outflow']
df_merged['agi_net'] = df_merged['agi_inflow'] - df_merged['agi_outflow']
df_merged.to_csv('Data/netflow_CountyTotal.csv', index=False)

df_merged

#### Create a Metro migration total "series" to run in parallel with selected county


In [None]:
# Aggregate migration for each year
metro_data = df_merged.groupby('year').agg({
    'people_net': 'sum',
    'agi_net': 'sum',
    'people_inflow': 'sum',
    'agi_inflow': 'sum',
    'people_outflow': 'sum',
    'agi_outflow': 'sum'
}).reset_index()
metro_data['FIPS'] = 'n/a'
metro_data['county_name'] = 'Metro'
metro_data['agi_capita_inflow'] = 0
metro_data['agi_capita_outflow'] = 0

metro_data = metro_data[[
    'year',
    'FIPS',
    'county_name',
    'people_inflow',
    'agi_inflow',
    'agi_capita_inflow',
    'people_outflow',
    'agi_outflow',
    'agi_capita_outflow',
    'people_net',
    'agi_net'
]]

# Concatenate the metrowide with the original, filtered data
df_final = pd.concat([df_merged, metro_data], ignore_index=True)

df_final['county_name'] = df_final['county_name'].str.split(
    ' County', expand=True)[0]


df_final.to_csv('Data/netflow_MetroTotal.csv', index=False)
df_final.tail(3)

In [None]:
# get county-level demographic KPIs
import glob
csv_files = glob.glob('County_CSV/*.csv')

df_master = pd.read_csv(csv_files[0])

# for file in csv_files[1:]:
#     df_temp = pd.read_csv(file)
#     df_master = df_master.merge(
#         df_temp,
#         on='County',
#         how='left',
#         suffixes=('', '_' + file.split('/')[-1].split('.')[0])
#     )

# df_master['county_name'] = df_master['County'].apply(
#     lambda x: x.split(' County')[0])

# df_master = df_master[df_master['county_name'].isin(county_dict.values())]

# df_master['2024 Population 55+'] = df_master['2024 Population Age 55-59'].str.replace(',', '').astype(int) + \
#     df_master['2024 Population Age 60-64'].str.replace(',', '').astype(int) + \
#     df_master['2024 Senior Population'].str.replace(',', '').astype(int)
# df_master['2024 Population 55+'] = df_master['2024 Population 55+'].apply(
#     lambda x: f'{x:,.0f}')

# df_master = df_master.drop(columns=[
#     'County',
#     '2024 Population Age 60-64',
#     '2024 Population Age 55-59',
#     '2024 Senior Population'
# ])

# cols = list(df_master)
# cols.insert(0, cols.pop(cols.index('county_name')))
# df_master = df_master.loc[:, cols]
# df_master.to_csv('County_CSV/countyKPI.csv', index=False)
# df_master.head(4)
df_master

In [None]:
# get census tract showing 55+ age bracket
df_senior = pd.read_csv(
    'CSV/Color-coded maps - 2024 Senior Population.csv', dtype={'GEOID': 'str'})
# df_senior['GEOID'] = df_senior['GEOID'].astype(float).map(
#     lambda x: '{:.2f}'.format(x).replace('.', ''))

df_55 = pd.read_csv(
    'CSV/Color-coded maps - 2024 Population Age 55-59.csv', dtype={'Census Tract': 'str'})
df_55 = df_55.rename(columns={
    'Census Tract': 'GEOID'
})
# df_55['GEOID'] = df_55['GEOID'].astype(float).map(
#     lambda x: '{:.2f}'.format(x).replace('.', ''))

df_60 = pd.read_csv(
    'CSV/Color-coded maps - 2024 Population Age 60-64.csv', dtype={'Census Tract': 'str'})
df_60 = df_60.rename(columns={
    'Census Tract': 'GEOID'
})
# df_60['GEOID'] = df_60['GEOID'].astype(float).map(
#     lambda x: '{:.2f}'.format(x).replace('.', ''))

df_senior_master = df_senior.merge(
    df_55,
    how='left',
    on='GEOID'
)

df_senior_master = df_senior_master.merge(
    df_60,
    how='left',
    on='GEOID'
)

df_senior_master['senior_total'] = df_senior_master['2024 Senior Population'] + \
    df_senior_master['2024 Population Age 55-59'] + \
    df_senior_master['2024 Population Age 60-64']
df_senior_master = df_senior_master[[
    'GEOID',
    'senior_total'
]]

df_senior_master = df_senior_master.rename(
    columns={'senior_total': '2024 Senior Population'})
df_senior_master.to_csv(
    'CSV/Color-coded maps - 2024 Senior Population_55.csv', index=False)

In [None]:
df_senior = pd.read_csv(
    'CSV/Color-coded maps - 2024 Senior Population.csv', dtype={'GEOID': 'str'})
df_senior['GEOID'] = df_senior['GEOID'].astype(float).map(
    lambda x: '{:.2f}'.format(x))

df_55 = pd.read_csv(
    'CSV/Color-coded maps - 2024 Population Age 55-59.csv', dtype={'Census Tract': 'str'})
df_55 = df_55.rename(columns={
    'Census Tract': 'GEOID'
})

df_60 = pd.read_csv(
    'CSV/Color-coded maps - 2024 Population Age 60-64.csv', dtype={'Census Tract': 'str'})
df_60 = df_60.rename(columns={
    'Census Tract': 'GEOID'
})

df_senior_master = df_senior.merge(
    df_55,
    how='left',
    on='GEOID'
)

df_senior_master = df_senior_master.merge(
    df_60,
    how='left',
    on='GEOID'
)

df_senior_master['senior_total'] = df_senior_master['2024 Senior Population'] + \
    df_senior_master['2024 Population Age 55-59'] + \
    df_senior_master['2024 Population Age 60-64']
df_senior_master = df_senior_master[[
    'GEOID',
    'senior_total'
]]

df_senior_master = df_senior_master.rename(
    columns={'senior_total': '2024 Senior Population'})
df_senior_master.to_csv(
    'CSV/Color-coded maps - 2024 Senior Population_55.csv', index=False)

df_senior_master

### ParclLabs home sales data


In [6]:
# Step 1: load API key
load_dotenv('../Unused/.env')
api_key = os.getenv('PARCL_LABS_API_KEY')
client = ParclLabsClient(
    api_key=api_key
)
print('API key loaded - Step 1 complete!')

# Step 2: create dictionary of county names & Parcl id values
metro_county_ids = []
for county in county_dict.keys():
    initial_query = client.search.markets.retrieve(
        geoid=county
    )
    county_id = initial_query['parcl_id'].values[0]
    metro_county_ids.append(county_id)
print(f'{len(metro_county_ids)} metro county IDs acquired - Step 2 complete!')

# Step 3: create new assessor dataframe with all single-family homes
print('Searching for single-family homes...')
assessor_df = []
for county_id in metro_county_ids:
    homes = client.property.search.retrieve(
        parcl_ids=[county_id],
        property_type='SINGLE_FAMILY',
        year_built_min=2020
    )
    homes = homes[[
        'parcl_property_id',
        'address',
        'city',
        'latitude',
        'longitude',
        'square_footage',
        'year_built',
    ]]
    assessor_df.append(homes)

assessor_combined = pd.concat(assessor_df, ignore_index=True)
print('Metro assessor file created - Step 3 complete!')

# Step 4: get sales events for the IDs in the assessor file ----------------------
months_to_go_back = 18
end_date, start_date = get_dates(months_to_go_back)

parcl_property_ids = assessor_combined['parcl_property_id'].unique()

print(f'Searching for sales events since {start_date}...')
# run this through the events function
sales = client.property.events.retrieve(
    parcl_property_ids=parcl_property_ids,
    event_type='SALE',
    start_date=start_date,
    end_date=end_date,
)
print('Finished property events search - Step 4 complete!')

# remove zero-dollar transfers
sales = sales[sales['price'] > 0]

# remove non arms length
sales = sales[sales['event_name'] == 'SOLD']

# clean up the dataframe
sales = sales[[
    'parcl_property_id',
    'price'
]]

# merge with assessor data to get property data
sales_with_assessor = pd.merge(
    sales,
    assessor_combined,
    how='left',
    on='parcl_property_id'
)

# Step 5: spatial join with tracts & county layers
sales_geo = gpd.GeoDataFrame(
    sales_with_assessor,
    geometry=gpd.points_from_xy(
        sales_with_assessor['longitude'], sales_with_assessor['latitude']),
    crs="EPSG:4269"
)

# tracts first
tracts = gpd.read_file('tracts_simp.gpkg')
sales_tract = gpd.sjoin(sales_geo, tracts, predicate='within')
print('Spatially joined with tracts - Step 5 complete!')
tract_agg = sales_tract.groupby('GEOID').agg(
    total_sales=('parcl_property_id', 'count'),
    median_price=('price', 'median'),
    median_SF=('square_footage', 'median')
).reset_index()
tract_agg.to_csv('Parcl_Recorder/tract_aggregation.csv', index=False)
print('Tract aggregation created & exported - Step 6 complete!')

# then counties
counties = gpd.read_file('counties_simp.gpkg')
sales_county = gpd.sjoin(sales_geo, counties, predicate='within')
print('Spatially joined with counties - Step 7 complete!')
county_agg = sales_county.groupby('county_stripped').agg(
    total_sales=('parcl_property_id', 'count'),
    median_price=('price', 'median'),
    median_SF=('square_footage', 'median')
).reset_index().rename(columns={'county_stripped': 'county'})
new_total_sales = county_agg['total_sales'].to_list()
new_median_price = county_agg['median_price'].to_list()
new_median_SF = county_agg['median_SF'].to_list()
countyKPI = pd.read_csv('County_CSV/countyKPI.csv')
countyKPI['total_sales'] = new_total_sales
countyKPI['median_price'] = new_median_price
countyKPI['median_SF'] = new_median_SF

countyKPI['total_sales'] = countyKPI['total_sales'].astype(
    str).apply(lambda x: f"{int(x):,}")
countyKPI['median_price'] = countyKPI['median_price'].astype(
    str).apply(lambda x: f"${float(x):,.0f}")
countyKPI['median_SF'] = countyKPI['median_SF'].astype(
    str).apply(lambda x: f"{float(x):,.0f}")

countyKPI.to_csv('County_CSV/countyKPI.csv', index=False)
print('County KPI CSV updated with new sales data - Step 8 complete! And script is done!')

API key loaded - Step 1 complete!
14 metro county IDs acquired - Step 2 complete!
Searching for single-family homes...
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 0.4s (2.34/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 0.5s (1.99/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 1.2s (0.80/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 0.5s (1.99/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 0.4s (2.72/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 0.4s (2.36/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 1.3s (0.76/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 0.7s (1.44/s) 
Processing Parcl IDs |████████████████████████████████████████| 1/1 [100%] in 1.5s (0.65/s) 
Processing Parcl IDs |██████████████████████