In [1]:
# Install haversine package for calculating distance between geometric coordinates
!pip install haversine

Collecting haversine
  Using cached haversine-2.8.0-py2.py3-none-any.whl (7.7 kB)
Installing collected packages: haversine
Successfully installed haversine-2.8.0


In [2]:
# Import dependencies
import pandas as pd
import haversine as hs
from haversine import Unit

# Read in csv containing county midpoint location data
county_locs = pd.read_csv('../cleaned_data/corrected_midpoints.csv', dtype={'GEOID': str})

# Read in csv containing nuclear power plant locations and generation amounts
nuke_locs = pd.read_csv('../cleaned_data/operating_reactors.csv')

In [3]:
county_locs.head()

Unnamed: 0,latitude,longitude,GEOID,County_State
0,32.53492,-86.642749,1001,"Autauga County, Alabama"
1,30.66097,-87.74984,1003,"Baldwin County, Alabama"
2,31.869603,-85.393197,1005,"Barbour County, Alabama"
3,32.998644,-87.126439,1007,"Bibb County, Alabama"
4,33.980867,-86.567371,1009,"Blount County, Alabama"


In [4]:
nuke_locs.head()

Unnamed: 0,plant_name,latitude,longitude,capacity_Mwe
0,Arkansas Nuclear One,35.311,-93.2351,1845.0
1,Beaver Valley Power Station,40.6219,-80.4336,1846.8
2,Braidwood Station,41.2435,-88.2286,2449.8
3,Browns Ferry Nuclear Plant,34.7042,-87.1189,3567.5
4,Brunswick Steam Electric Plant,33.9597,-78.0114,2003.2


In [5]:
# Define a function that takes in a location and iterates through all of the nuclear power plant locations
# to calculate distance to the given location using haversine.
# Use min() to find the closest nuclear plant to the given point, and return the name, capacity, 
# and distance for that nuclear plant.


def closest_plant(county_lat,county_lng):
    for index, row in nuke_locs.iterrows():
        loc1 = [county_lat, county_lng]
        loc2 = [row["latitude"], row["longitude"]]
        nuke_locs.loc[index,"dist_from_county"] = round(hs.haversine(loc1,loc2,unit=Unit.MILES))
    closest = pd.DataFrame(nuke_locs[nuke_locs.dist_from_county == nuke_locs.dist_from_county.min()])
    name = closest.iloc[0][0]
    distance = closest.iloc[0][4]
    capacity = closest.iloc[0][3]
    return name, distance, capacity

In [6]:
# Iterate through all counties in the list, calculating the closest nuclear plant for each one
# and storing the plant name, distance, and capacity in that county's row.

for index, row in county_locs.iterrows():
    county_lat = row["latitude"]
    county_lng = row["longitude"]
    [name, distance, capacity] = closest_plant(county_lat,county_lng)
    county_locs.loc[index,["closest_plant","distance","plant_capacity"]] = [name, distance, capacity]
county_locs.head()

Unnamed: 0,latitude,longitude,GEOID,County_State,closest_plant,distance,plant_capacity
0,32.53492,-86.642749,1001,"Autauga County, Alabama",Joseph M. Farley Nuclear Plant,128.0,1776.4
1,30.66097,-87.74984,1003,"Baldwin County, Alabama",Joseph M. Farley Nuclear Plant,161.0,1776.4
2,31.869603,-85.393197,1005,"Barbour County, Alabama",Joseph M. Farley Nuclear Plant,48.0,1776.4
3,32.998644,-87.126439,1007,"Bibb County, Alabama",Browns Ferry Nuclear Plant,118.0,3567.5
4,33.980867,-86.567371,1009,"Blount County, Alabama",Browns Ferry Nuclear Plant,59.0,3567.5


In [None]:
# Export the data to a csv file
county_locs.to_csv('../cleaned_data/closest_nuc_plant_in_each_county.csv',index=False)

In [7]:
# Sort by distance
sorted_df = county_locs.sort_values('distance')
sorted_df.head()

Unnamed: 0,latitude,longitude,GEOID,County_State,closest_plant,distance,plant_capacity
903,38.236838,-95.734112,20031,"Coffey County, Kansas",Wolf Creek Generating Station,2.0,1267.7
2104,41.593901,-83.019978,39123,"Ottawa County, Ohio",Davis-Besse Nuclear Power Station,3.0,925.2
2683,28.785651,-96.003981,48321,"Matagorda County, Texas",South Texas Project,3.0,2708.6
665,42.042566,-89.320727,17141,"Ogle County, Illinois",Byron Station,3.0,2449.8
1310,42.285106,-86.306415,26159,"Van Buren County, Michigan",Palisade Nuclear Station,3.0,805.0


In [15]:
# After sorting, drop duplicates so that each plant is mentioned only once.
# Because they were sorted, this gives us the county that each plant is located in.
unique_df = sorted_df.drop_duplicates('closest_plant')
unique_df.head(100)

Unnamed: 0,latitude,longitude,GEOID,County_State,closest_plant,distance,plant_capacity
903,38.236838,-95.734112,20031,"Coffey County, Kansas",Wolf Creek Generating Station,2.0,1267.7
2104,41.593901,-83.019978,39123,"Ottawa County, Ohio",Davis-Besse Nuclear Power Station,3.0,925.2
2683,28.785651,-96.003981,48321,"Matagorda County, Texas",South Texas Project,3.0,2708.6
665,42.042566,-89.320727,17141,"Ogle County, Illinois",Byron Station,3.0,2449.8
1310,42.285106,-86.306415,26159,"Van Buren County, Michigan",Palisade Nuclear Station,3.0,805.0
614,40.174629,-88.904089,17039,"De Witt County, Illinois",Clinton Power Station,4.0,1138.3
2735,32.222288,-97.774335,48425,"Somervell County, Texas",Comanche Peak Nuclear Power Plant,5.0,2430.0
2460,35.180876,-85.164757,47065,"Hamilton County, Tennessee",Sequoyah Nuclear Plant,5.0,2441.0
2488,35.512826,-84.813387,47121,"Meigs County, Tennessee",Watts Bar Nuclear Plant,6.0,2539.8
1241,41.995778,-86.685421,26021,"Berrien County, Michigan",Donald C. Cook Nuclear Plant,6.0,2285.3


In [17]:
cleaned_df = unique_df.drop(columns=['latitude','longitude','distance'])
cleaned_df.rename(columns={'closest_plant': 'plant_name'},inplace=True)
cleaned_df.head(100)

Unnamed: 0,GEOID,County_State,plant_name,plant_capacity
903,20031,"Coffey County, Kansas",Wolf Creek Generating Station,1267.7
2104,39123,"Ottawa County, Ohio",Davis-Besse Nuclear Power Station,925.2
2683,48321,"Matagorda County, Texas",South Texas Project,2708.6
665,17141,"Ogle County, Illinois",Byron Station,2449.8
1310,26159,"Van Buren County, Michigan",Palisade Nuclear Station,805.0
614,17039,"De Witt County, Illinois",Clinton Power Station,1138.3
2735,48425,"Somervell County, Texas",Comanche Peak Nuclear Power Plant,2430.0
2460,47065,"Hamilton County, Tennessee",Sequoyah Nuclear Plant,2441.0
2488,47121,"Meigs County, Tennessee",Watts Bar Nuclear Plant,2539.8
1241,26021,"Berrien County, Michigan",Donald C. Cook Nuclear Plant,2285.3


In [23]:
nuke_locs_w_fips = nuke_locs.merge(cleaned_df,how='left',on='plant_name')
nuke_locs_w_fips.drop(columns=['dist_from_county','plant_capacity'],inplace=True)
nuke_locs_w_fips.rename(columns={'GEOID': 'FIPS'},inplace=True)
nuke_locs_w_fips.head(100)

Unnamed: 0,plant_name,latitude,longitude,capacity_Mwe,FIPS,County_State
0,Arkansas Nuclear One,35.311,-93.2351,1845.0,5115.0,"Pope County, Arkansas"
1,Beaver Valley Power Station,40.6219,-80.4336,1846.8,42007.0,"Beaver County, Pennsylvania"
2,Braidwood Station,41.2435,-88.2286,2449.8,17063.0,"Grundy County, Illinois"
3,Browns Ferry Nuclear Plant,34.7042,-87.1189,3567.5,1083.0,"Limestone County, Alabama"
4,Brunswick Steam Electric Plant,33.9597,-78.0114,2003.2,37019.0,"Brunswick County, North Carolina"
5,Byron Station,42.0742,-89.2819,2449.8,17141.0,"Ogle County, Illinois"
6,Callaway Plant,38.758919,-91.778841,1235.8,29027.0,"Callaway County, Missouri"
7,Calvert Cliffs Nuclear Power Plant,38.4344,-76.4417,1850.4,24009.0,"Calvert County, Maryland"
8,Catawba Nuclear Station,35.0514,-81.0694,2410.2,45091.0,"York County, South Carolina"
9,Clinton Power Station,40.1719,-88.8339,1138.3,17039.0,"De Witt County, Illinois"


In [24]:
# Export the data to a csv file
nuke_locs_w_fips.to_csv('../cleaned_data/nuke_plants_w_fips.csv',index=False)