In [None]:
import pandas as pd
import plotly.graph_objects as go
import matplotlib.pyplot as plt

import requests
import json
from datetime import timedelta

!pip install utm
import utm


# Read and Preprocess Landfill Data

In [None]:
READ_PATH = "../../data/raw/landfill_facilities.csv"
use_cols = ['Facility Name', 'Location Address','City', 'Zip Code', 'County',
            'East Coordinate', 'North Coordinate']
landfill_df = pd.read_csv(READ_PATH, usecols=use_cols)
landfill_df.head(2)

In [None]:
# there is one facility with off coordinates
# and the zip code does not match with google map zip code
# so will drop this facility
landfill_df = landfill_df[landfill_df['East Coordinate']!=0]

# convert East and North coordinates (UTM) to lat lon
landfill_df['lat'], landfill_df['lon'] = utm.to_latlon(landfill_df['East Coordinate'], landfill_df['North Coordinate'], 18, 'U')
landfill_df.head(2)

In [None]:
# visualize all landfills on a map 
fig = go.Figure(data=go.Scattergeo(
    lon = landfill_df['lon'],
    lat = landfill_df['lat'],
    mode = 'markers'
))

fig.update_layout(
    title = 'New York Landfill Locations',
    geo_scope = 'usa'
)

fig.update_geos(
    fitbounds='locations'
)
fig.show()


# Read and Preprocess Windfarm Data

In [None]:
# read uswtdb windfarm dataset 
READ_PATH_FARMS = "../../data/raw/usgs/usgs_data.csv"
useful_columns = ['case_id','t_state','p_name','p_tnum','xlong','ylat']

uswtdb_farm_df = pd.read_csv(READ_PATH_FARMS, encoding='latin1', usecols=useful_columns)
uswtdb_farm_df = uswtdb_farm_df[uswtdb_farm_df.t_state=='NY']
uswtdb_farm_df.head()

# for the same project combine long and lat by averaging
uswtdb_farm_df.drop(['case_id', 't_state', 'p_tnum'], axis=1, inplace=True)
uswtdb_farm_df = uswtdb_farm_df.groupby(['p_name']).mean()
uswtdb_farm_df.head()

In [None]:
# read nyserda windfarm dataset 
READ_PATH_FARMS_NYSERDA = "../../data/raw/Large-scale_Renewable_Projects_Reported_by_NYSERDA__Beginning_2004_Wind.xlsx"
use_cols = ['Project Name', 'Project Status', 'Renewable Technology', 'Georeference']
nyserda_farm_df = pd.read_excel(READ_PATH_FARMS_NYSERDA, usecols=use_cols)

# only keep under development windfarm entries
nyserda_farm_df = nyserda_farm_df[nyserda_farm_df['Project Status']=='Under Development']
renewable_tech_lst = ['Land Based Wind', 'Offshore Wind']
nyserda_farm_df = nyserda_farm_df[nyserda_farm_df['Renewable Technology'].isin(renewable_tech_lst)]
nyserda_farm_df.drop(['Project Status'], axis=1, inplace=True)
nyserda_farm_df.drop(['Renewable Technology'], axis=1, inplace=True)

# clean up Georeference col to longitude and latitude cols
# reference: Jillia's data cleaning code 
# https://colab.research.google.com/drive/1matK9Fmje3h0lB1Q67AWF10IH1lUg_4W#scrollTo=bmNgfzB4Yy1u
nyserda_farm_df["clean_name"] = nyserda_farm_df["Georeference"].apply(lambda x: str(x).split('(')[1].lstrip().split(')')[0] if "POINT" in str(x) else np.NaN)
nyserda_farm_df["xlong"] = nyserda_farm_df["clean_name"].apply(lambda x: str(x).split()[0] if " " in str(x) else np.NaN).astype(float)
nyserda_farm_df["ylat"] = nyserda_farm_df["clean_name"].apply(lambda x: str(x).split()[1] if " " in str(x) else np.NaN).astype(float)
nyserda_farm_df.drop('Georeference', axis=1, inplace=True)
nyserda_farm_df.drop('clean_name', axis=1, inplace=True)

# reset index to project name
nyserda_farm_df.rename(columns={"Project Name": "p_name"}, inplace=True)
nyserda_farm_df.set_index('p_name', inplace=True)
nyserda_farm_df.head()

In [None]:
# combine farms in uswtdb and under development farms in NYSERDA
farm_df = pd.concat([uswtdb_farm_df, nyserda_farm_df])
print("There are {} wind farms in New York State".format(len(farm_df)))
farm_df.head(2)

# GHG Radius Modelling
Assumptions:
* all landfills take in blades (maybe need more detailed look later). It seems like that are a few categories of landfills and some of them might not accept blades

In [None]:
def compute_closest_landfill_dist(
    farm_lat: float,
    farm_lon: float,
    landfill_lst: list) -> float:
    """
    compute the distance between a wind farm and its closest landfill
    args:
        - farm_lat: the latitude of the farm
        - farm_lon: the longitude of the farm
        - landfill_lst: a list of tuples describing the locations of landfills
                        (latitude, longitude)
    return:
        - distance from the farm to the closest landfill
    """
    min_dist = float('inf')
    for landfill in landfill_lst:
        landfill_lat, landfill_lon = landfill
        # refer the following get distance code to 
        # 03.1 Modeling: Driving Distance Between Two Points by Sarosh
        r = requests.get(f"""http://router.project-osrm.org/route/v1/car/{farm_lon},{farm_lat};{landfill_lon},{landfill_lat}?overview=false""")
        route_1 = json.loads(r.content)["routes"][0]
        # extract distance in meters
        distance = route_1['distance']
        min_dist = min(min_dist, distance)
    return min_dist

In [None]:
# save landfill lat and lon into a list of tuples
landfill_lst = list(zip(landfill_df.lat, landfill_df.lon))

In [None]:
# it takes about 70 min to run the cell
result_farm_df = farm_df.apply(lambda row: compute_closest_landfill_dist(
    row.ylat, row.xlong, landfill_lst), axis=1, result_type='expand')

In [None]:
# combine locations with radius and output df as csv
farm_df['ghg_radius_m'] = result_farm_df
WRITE_PATH = "../../data/processed/wind_farms_ghg_radius.csv"
farm_df.to_csv(WRITE_PATH)

In [None]:
farm_df.head()