In [1]:
# Install haversine package for calculating distance between geometric coordinates
!pip install haversine



In [10]:
# Import dependencies
import pandas as pd
import haversine as hs
import numpy as np
from haversine import Unit

# Read in csv containing county midpoint location data
county_locs = pd.read_csv('../cleaned_data/corrected_midpoints.csv', dtype={'GEOID': str})

# Read in csv containing nuclear power plant locations and generation amounts
plant_locs = pd.read_csv('../cleaned_data/cleaned_fossil_fuel_plants.csv', dtype={'FIPS': str})

In [11]:
county_locs.head()

Unnamed: 0,latitude,longitude,GEOID,County_State
0,32.53492,-86.642749,1001,"Autauga County, Alabama"
1,30.66097,-87.74984,1003,"Baldwin County, Alabama"
2,31.869603,-85.393197,1005,"Barbour County, Alabama"
3,32.998644,-87.126439,1007,"Bibb County, Alabama"
4,33.980867,-86.567371,1009,"Blount County, Alabama"


In [4]:
# Drop the redundant net_generation_MWh column
plant_locs = plant_locs.drop("net_generation_MWh",axis=1)
plant_locs.head()

Unnamed: 0,plant_name,FIPS,plant_county,plant_state,latitude,longitude,fuel_type,nameplate_capacity_MW,NOx_tons,SO2_tons,CO2_tons,CH4_lbs,N2O_lbs,PM2.5_tons
0,Sand Point,2013,Aleutians East,Alaska,55.339722,-160.497222,Oil,4.0,46.29,4.195,2362.834,191.366,38.273,4.476344
1,Barry,1097,Mobile,Alabama,31.0069,-88.0103,Gas,2841.5,2361.074,3494.672,8290059.787,1136505.719,159420.742,643.155672
2,Gadsden,1055,Etowah,Alabama,34.0128,-85.9708,Gas,138.0,136.388,2.957,131425.54,3801.827,380.183,6.70727
3,Gorgas,1127,Walker,Alabama,33.644344,-87.196486,Coal,1416.7,490.525,142.687,598479.725,117468.333,17090.854,89.247027
4,Copper Station,48141,El Paso,Texas,31.7569,-106.375,Gas,86.9,85.83,0.2,41036.777,1517.994,151.799,2.317093


In [5]:
# Define a function that takes in a location and iterates through all of the fossil fuel power plant
# locations to calculate distance to the given location using haversine.
# Sort the dataset in ascending order by distance and use head(5) to store all of the data for the
# 5 closest power plants in a new data set, which is then returned.

def closest_plants(county_lat,county_lng):
    for index, row in plant_locs.iterrows():
        loc1 = [county_lat, county_lng]
        loc2 = [row["latitude"], row["longitude"]]
        plant_locs.loc[index,"dist_from_county"] = round(hs.haversine(loc1,loc2,unit=Unit.MILES))
    sorted_plants = plant_locs.sort_values(by=['dist_from_county'])
    closest5 = sorted_plants.head(5)
    return closest5

In [8]:
# Iterate through all counties in the list, calculating the 5 closest fossil fuel plants for each one
# and storing all of the data for that plant in that county's row.

for index, row in county_locs.iterrows():
    county_lat = row["latitude"]
    county_lng = row["longitude"]
    closest5 = closest_plants(county_lat,county_lng)
    for i in np.arange(5):
        col_names = []
        col_names.append(["fuel_type"+(i+1).astype(str)])
        col_names.append(["nameplate_capacity_MW"+(i+1).astype(str)])
        col_names.append(["NOx_tons"+(i+1).astype(str)])
        col_names.append(["SO2_tons"+(i+1).astype(str)])
        col_names.append(["CO2_tons"+(i+1).astype(str)])
        col_names.append(["CH4_lbs"+(i+1).astype(str)])
        col_names.append(["N2O_lbs"+(i+1).astype(str)])
        col_names.append(["PM2.5_tons"+(i+1).astype(str)])
        col_names.append(["dist_from_county"+(i+1).astype(str)])
        flat_cols = [item for sublist in col_names for item in sublist]
        county_locs.loc[index,flat_cols] = closest5.iloc[i][6:16].to_list()
county_locs.head()

Unnamed: 0,latitude,longitude,GEOID,County,fuel_type1,nameplate_capacity_MW1,NOx_tons1,SO2_tons1,CO2_tons1,CH4_lbs1,...,dist_from_county4,fuel_type5,nameplate_capacity_MW5,NOx_tons5,SO2_tons5,CO2_tons5,CH4_lbs5,N2O_lbs5,PM2.5_tons5,dist_from_county5
0,32.53492,-86.642749,1001,Autauga County,Gas,939.4,50.521,3.849,762545.203,28447.358,...,13.0,Gas,104.2,367.256,2.531,0.0,16303.841,9009.438,1.220169,15.0
1,30.66097,-87.74984,1003,Baldwin County,Gas,50.0,450.864,4.58,167490.328,6318.013,...,22.0,Gas,317.4,35.096,2.719,538661.787,16687.381,1668.738,72.588035,24.0
2,31.869603,-85.393197,1005,Barbour County,Biomass,120.5,312.818,0.59,0.0,134642.958,...,51.0,Biomass,101.2,350.852,888.835,62961.826,80929.295,16646.007,16.746958,52.0
3,32.998644,-87.126439,1007,Bibb County,Biomass,13.0,16.113,2.219,0.011,12526.086,...,42.0,Gas,2034.0,3462.81,1149.051,5283997.438,768776.092,108753.017,215.882167,42.0
4,33.980867,-86.567371,1009,Blount County,Other Fossil,3.8,2.197,0.009,1006.565,37.975,...,43.0,Gas,748.0,60.07,1.062,209853.617,8101.532,831.828,13.755226,44.0


In [9]:
county_locs.to_csv('../cleaned_data/closest_ff_plant_in_each_county.csv',index=False)