### dependencies


In [3]:
import pandas as pd
import geopandas as gpd
import warnings
import topojson as tp
import requests
import os

# set metro county dictionary
county_dict = {
    '47015': 'Cannon',
    '47021': 'Cheatham',
    '47037': 'Davidson',
    '47043': 'Dickson',
    '47081': 'Hickman',
    '47111': 'Macon',
    '47119': 'Maury',
    '47147': 'Robertson',
    '47149': 'Rutherford',
    '47159': 'Smith',
    '47165': 'Sumner',
    '47169': 'Trousdale',
    '47187': 'Williamson',
    '47189': 'Wilson'
}

# nationwide FIPS codes
fips_dict = {
    '01': 'AL',
    '02': 'AK',
    '04': 'AZ',
    '05': 'AR',
    '06': 'CA',
    '08': 'CO',
    '09': 'CT',
    '10': 'DE',
    '11': 'DC',
    '12': 'FL',
    '13': 'GA',
    '15': 'HI',
    '16': 'ID',
    '17': 'IL',
    '18': 'IN',
    '19': 'IA',
    '20': 'KS',
    '21': 'KY',
    '22': 'LA',
    '23': 'ME',
    '24': 'MD',
    '25': 'MA',
    '26': 'MI',
    '27': 'MN',
    '28': 'MS',
    '29': 'MO',
    '30': 'MT',
    '31': 'NE',
    '32': 'NV',
    '33': 'NH',
    '34': 'NJ',
    '35': 'NM',
    '36': 'NY',
    '37': 'NC',
    '38': 'ND',
    '39': 'OH',
    '40': 'OK',
    '41': 'OR',
    '42': 'PA',
    '44': 'RI',
    '45': 'SC',
    '46': 'SD',
    '47': 'TN',
    '48': 'TX',
    '49': 'UT',
    '50': 'VT',
    '51': 'VA',
    '53': 'WA',
    '54': 'WV',
    '55': 'WI',
    '56': 'WY'
}

url = 'https://transition.fcc.gov/oet/info/maps/census/fips/fips.txt'

# Fetch the content from the URL
response = requests.get(url)
response.raise_for_status()  # Check that the request was successful

table = response.text.split('------------    --------------\n')[1]

# Strip leading/trailing whitespace and split by newline
lines = table.strip().split('\n')

# Create a DataFrame from the list of lines
df = pd.DataFrame(lines, columns=['Data'])

# Split the 'Data' column on the first space
df[['FIPS', 'County_name']] = df['Data'].str.split(n=1, expand=True)

# Drop the original 'Data' column
df = df.drop(columns=['Data'])

# Drop rows where 'FIPS' ends with '000'
df = df[~df['FIPS'].str.endswith('000')]

# Extract the first 2 digits from 'FIPS' column
df['State_code'] = df['FIPS'].str[:2]

# Map 'State_code' to 'State' using fips_dict
df['State'] = df['State_code'].map(fips_dict)

# Drop the 'State_code' column if not needed
df = df.drop(columns=['State_code'])

df['county_state'] = df['County_name'] + ', ' + df['State']

# Create dictionary using zip and to_dict
nationwide_FIPSdict = dict(zip(df['FIPS'], df['county_state']))

### simplify tracts, derive counties


In [21]:
# ignore the warnings that come with simplifying geographically
warnings.filterwarnings("ignore", category=RuntimeWarning)

# simplify tracts --------------------------------------
tracts = gpd.read_file('tract_outlines.gpkg')
tracts['FIPS'] = tracts['STATEFP'] + tracts['COUNTYFP']
tracts = tracts[[
    'FIPS',
    'GEOID',
    'geometry'
]]

tracts['county_name'] = tracts['FIPS'].map(nationwide_FIPSdict)

toposimplify_tracts = 0.001
tracts_simp = tp.Topology(tracts, toposimplify=toposimplify_tracts).to_gdf()
tracts_simp.to_file('tracts_simp.gpkg')

# create the counties by dissolving the tracts on the FIPS column
counties = tracts.dissolve(by='FIPS').reset_index()
counties = counties.drop(columns='GEOID')
counties['county_name'] = counties['FIPS'].map(nationwide_FIPSdict)
counties['county_stripped'] = counties['county_name'].apply(
    lambda x: x.split(' County,')[0])
counties = counties[[
    'FIPS',
    'county_name',
    'county_stripped',
    'geometry'
]]

# export simplified geometry
counties.to_file('counties_simp.gpkg')
print('export complete!')

export complete!


In [20]:

counties

Unnamed: 0,FIPS,county_name,geometry,county_stripped
0,47015,"Cannon County, TN","POLYGON ((-85.93284 35.79853, -85.93680 35.793...",Cannon
1,47021,"Cheatham County, TN","POLYGON ((-87.18227 36.05112, -87.18198 36.052...",Cheatham
2,47037,"Davidson County, TN","MULTIPOLYGON (((-87.05341 36.04973, -87.05316 ...",Davidson
3,47043,"Dickson County, TN","MULTIPOLYGON (((-87.31521 35.96994, -87.31602 ...",Dickson
4,47081,"Hickman County, TN","POLYGON ((-87.26503 35.71857, -87.26354 35.717...",Hickman
5,47111,"Macon County, TN","POLYGON ((-85.92066 36.62600, -85.91950 36.625...",Macon
6,47119,"Maury County, TN","POLYGON ((-87.09449 35.44209, -87.09445 35.442...",Maury
7,47147,"Robertson County, TN","POLYGON ((-86.89896 36.38997, -86.89913 36.389...",Robertson
8,47149,"Rutherford County, TN","POLYGON ((-86.69973 35.72561, -86.69961 35.726...",Rutherford
9,47159,"Smith County, TN","POLYGON ((-85.84352 36.28592, -85.84317 36.285...",Smith


### Convert STDB Excel files to CSV


In [4]:
# Need to open each Excel file downloaded from STDB, make a small change, and save
# Then run this script
def convert_excel_to_csv(directory, output_directory):
    for filename in os.listdir(directory):
        if filename.startswith("Color-coded maps") and filename.endswith(".xlsx"):
            # Construct the full path to the Excel file
            excel_path = os.path.join(directory, filename)

            # Read the Excel file into a DataFrame
            df = pd.read_excel(excel_path, engine='openpyxl')

            # Ensure the "Census Tract" column is of type object (string)
            df['Census Tract'] = df['Census Tract'].astype(str)

            # Rename the "Census Tract" column to "GEOID"
            df.rename(columns={'Census Tract': 'GEOID'}, inplace=True)

            # Construct the full path for the output CSV file
            csv_filename = filename.replace(".xlsx", ".csv")
            csv_path = os.path.join(output_directory, csv_filename)

            # Save the DataFrame to a CSV file
            df.to_csv(csv_path, index=False)
            print(f"Converted {filename} to {csv_filename}")


convert_excel_to_csv('Data/', 'Data/CSV/')

Converted Color-coded maps - 2024-2029 Growth Rate Population.xlsx to Color-coded maps - 2024-2029 Growth Rate Population.csv
Converted Color-coded maps - 2024 Senior Population.xlsx to Color-coded maps - 2024 Senior Population.csv
Converted Color-coded maps - 2024 Population Density.xlsx to Color-coded maps - 2024 Population Density.csv
Converted Color-coded maps - 2029 Total Population.xlsx to Color-coded maps - 2029 Total Population.csv
Converted Color-coded maps - 2024-2029 Growth Rate Owner Occ HUs.xlsx to Color-coded maps - 2024-2029 Growth Rate Owner Occ HUs.csv
Converted Color-coded maps - 2024 Total Population.xlsx to Color-coded maps - 2024 Total Population.csv
Converted Color-coded maps - 2024 Median Household Income.xlsx to Color-coded maps - 2024 Median Household Income.csv


In [54]:
gdf = gpd.read_file('Data/counties_simp.gpkg')

gdf

Unnamed: 0,FIPS,county_name,county_stripped,geometry
0,47015,"Cannon County, TN",Cannon,"POLYGON ((-85.93284 35.79853, -85.93680 35.793..."
1,47021,"Cheatham County, TN",Cheatham,"POLYGON ((-87.18227 36.05112, -87.18198 36.052..."
2,47037,"Davidson County, TN",Davidson,"MULTIPOLYGON (((-87.05341 36.04973, -87.05316 ..."
3,47043,"Dickson County, TN",Dickson,"MULTIPOLYGON (((-87.31521 35.96994, -87.31602 ..."
4,47081,"Hickman County, TN",Hickman,"POLYGON ((-87.26503 35.71857, -87.26354 35.717..."
5,47111,"Macon County, TN",Macon,"POLYGON ((-85.92066 36.62600, -85.91950 36.625..."
6,47119,"Maury County, TN",Maury,"POLYGON ((-87.09449 35.44209, -87.09445 35.442..."
7,47147,"Robertson County, TN",Robertson,"POLYGON ((-86.89896 36.38997, -86.89913 36.389..."
8,47149,"Rutherford County, TN",Rutherford,"POLYGON ((-86.69973 35.72561, -86.69961 35.726..."
9,47159,"Smith County, TN",Smith,"POLYGON ((-85.84352 36.28592, -85.84317 36.285..."


### Generate a county total net dataframe


In [5]:
countyTotal_inflow = pd.read_csv('Data/inflow_CountyTotal.csv')
countyTotal_inflow['merge_ID'] = countyTotal_inflow['destination_FIPS'].astype(
    str) + '-' + countyTotal_inflow['year'].astype(str)

countyTotal_outflow = pd.read_csv('Data/outflow_CountyTotal.csv')
countyTotal_outflow['merge_ID'] = countyTotal_outflow['origin_FIPS'].astype(
    str) + '-' + countyTotal_outflow['year'].astype(str)

df_merged = pd.merge(
    countyTotal_inflow,
    countyTotal_outflow,
    on='merge_ID'
)

df_merged = df_merged.rename(columns={
    'destination_FIPS': 'FIPS',
    'year_x': 'year',
    'destination_county': 'county_name'
})

df_merged = df_merged[[
    'year',
    'FIPS',
    'county_name',
    'people_inflow',
    'agi_inflow',
    'agi_capita_inflow',
    'people_outflow',
    'agi_outflow',
    'agi_capita_outflow'
]]

df_merged = df_merged.sort_values(by='year')

df_merged['people_net'] = df_merged['people_inflow'] - \
    df_merged['people_outflow']
df_merged['agi_net'] = df_merged['agi_inflow'] - df_merged['agi_outflow']
df_merged.to_csv('Data/netflow_CountyTotal.csv', index=False)

df_merged

Unnamed: 0,year,FIPS,county_name,people_inflow,agi_inflow,agi_capita_inflow,people_outflow,agi_outflow,agi_capita_outflow,people_net,agi_net
0,2018,47015,"Cannon County, TN",909,21629000,23794.279428,764,15686000,20531.413613,145,5943000
12,2018,47187,"Williamson County, TN",19780,1159148000,58602.022245,14101,788870000,55944.259272,5679,370278000
11,2018,47169,"Trousdale County, TN",651,13203000,20281.105991,608,10821000,17797.697368,43,2382000
10,2018,47165,"Sumner County, TN",13181,396465000,30078.522115,10242,299666000,29258.543253,2939,96799000
9,2018,47159,"Smith County, TN",1154,25101000,21751.299827,974,19315000,19830.595483,180,5786000
...,...,...,...,...,...,...,...,...,...,...,...
23,2022,47159,"Smith County, TN",1150,34229000,29764.347826,996,32966000,33098.393574,154,1263000
24,2022,47165,"Sumner County, TN",14287,665833000,46604.115630,11171,422170000,37791.603258,3116,243663000
25,2022,47169,"Trousdale County, TN",920,24839000,26998.913043,780,17235000,22096.153846,140,7604000
27,2022,47189,"Wilson County, TN",13626,697706000,51204.021723,8873,349104000,39344.528344,4753,348602000


#### Create a Metro migration total "series" to run in parallel with selected county


In [22]:
# Aggregate migration for each year
metro_data = df_merged.groupby('year').agg({
    'people_net': 'sum',
    'agi_net': 'sum',
    'people_inflow': 'sum',
    'agi_inflow': 'sum',
    'people_outflow': 'sum',
    'agi_outflow': 'sum'
}).reset_index()
metro_data['FIPS'] = 'n/a'
metro_data['county_name'] = 'Metro'
metro_data['agi_capita_inflow'] = 0
metro_data['agi_capita_outflow'] = 0

metro_data = metro_data[[
    'year',
    'FIPS',
    'county_name',
    'people_inflow',
    'agi_inflow',
    'agi_capita_inflow',
    'people_outflow',
    'agi_outflow',
    'agi_capita_outflow',
    'people_net',
    'agi_net'
]]

# Concatenate the metrowide with the original, filtered data
df_final = pd.concat([df_merged, metro_data], ignore_index=True)

df_final['county_name'] = df_final['county_name'].str.split(
    ' County', expand=True)[0]


df_final.to_csv('Data/netflow_MetroTotal.csv', index=False)
df_final.tail(3)

Unnamed: 0,year,FIPS,county_name,people_inflow,agi_inflow,agi_capita_inflow,people_outflow,agi_outflow,agi_capita_outflow,people_net,agi_net
72,2020,,Metro,142289,5991095000,0.0,130531,4824513000,0.0,11758,1166582000
73,2021,,Metro,144050,6923170000,0.0,128463,4869063000,0.0,15587,2054107000
74,2022,,Metro,142160,8074699000,0.0,126223,6014103000,0.0,15937,2060596000
