In [1]:
# Mount Drive
from google.colab import drive
drive.mount("/content/drive/")

Mounted at /content/drive/


In [2]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')

# Get the travel info
import requests
import json
from datetime import timedelta

# Load ML Model
import joblib

# Interactive map
import folium

# Load Data: USGS Dataset

In [None]:
useful_columns1 = ['t_state','p_name','p_tnum','p_cap',
                   't_manu','t_cap','t_hh','t_rd',
                   't_rsa','t_hh','xlong','ylat'] 

In [None]:
df1 = pd.read_csv('/content/drive/MyDrive/Capstone Data/uswtdb_v5_2_20221012.csv',
                 engine='python',encoding='latin1',usecols=useful_columns1)
df1 = df1[df1.t_state=='NY']

# Predict Turbine Weights

In [None]:
processed_manu = [x.lower() if type(x)==str else x for x in list(df1['t_manu'])]

for i in range(len(processed_manu)):
  if processed_manu[i] not in ['fuhrlander', 'ge wind','goldwind','nordex','other', 'repower', 'siemens gamesa renewable energy', 'vensys', 'vergnet','vestas']:
    processed_manu[i] = 'other'

df1.insert(1, 'manufacturer_processed', processed_manu)

df1.drop('t_manu',axis=1,inplace=True)

In [None]:
X = np.array(df1[['manufacturer_processed','t_cap','t_rd','t_rsa','t_hh']])

model_path = '/content/drive/MyDrive/Wind Turbine Capstone Project/Sarosh/xgb_predict_rotor_weight_tons.pkl'
model = joblib.load(model_path)

df1['rotor_weight'] = model.predict(X)

In [None]:
df1['t_rd'] = df1['t_rd'].fillna(df1['t_rd'].median())

In [None]:
df1['turbine_blade_weight'] = (1/6) * df1['rotor_weight']
df1['t_blade_length'] = df1['t_rd'] / 2

# Save Dataset

In [None]:
project_data = pd.DataFrame(df1.groupby(['p_name','manufacturer_processed'])[['xlong','ylat','p_tnum','t_cap','turbine_blade_weight','t_blade_length']].mean())
project_data.columns = ['longitude','latitude','numb_turbines','avg_turbine_capacity','turbine_blade_weight','turbine_blade_length']
project_data.reset_index(drop=False,inplace=True)
project_data.sort_values(by=['p_name'],ascending=False,inplace=True)
project_data['text'] = 'Project Name: ' + project_data['p_name'] + '<br>Number of Turbines: ' + project_data['numb_turbines'].astype(str) + '<br>Average Turbine Capacity: ' + (project_data['avg_turbine_capacity']).astype(str) + " MW"


In [None]:
project_data.head()

Unnamed: 0,p_name,manufacturer_processed,longitude,latitude,numb_turbines,avg_turbine_capacity,turbine_blade_weight,turbine_blade_length,text
49,unknown Wyoming County,other,-78.187935,42.740818,1.0,,0.66904,41.0,Project Name: unknown Wyoming County<br>Number...
48,unknown Steuben County,other,-77.533662,42.486564,11.0,,0.66904,41.0,Project Name: unknown Steuben County<br>Number...
47,unknown Oneida County,other,-75.447369,42.975771,2.0,,0.66904,41.0,Project Name: unknown Oneida County<br>Number ...
46,unknown Niagara County,other,-78.493963,43.282899,1.0,,0.66904,41.0,Project Name: unknown Niagara County<br>Number...
45,unknown Montgomery County,other,-74.510246,42.905187,2.0,,0.66904,41.0,Project Name: unknown Montgomery County<br>Num...


In [None]:
project_data.to_csv('usgs_data.csv')
!cp usgs_data.csv "drive/My Drive/Capstone Data"

# Load Data: NYSERDA Dataset

In [None]:
useful_columns2 = ['Project Name','Renewable Technology','Project Status','Year of Delivery Start Date','Contract Duration',
                  'Georeference']

In [None]:
df2 = pd.read_excel('/content/drive/MyDrive/Capstone Data/Large-scale_Renewable_Projects_Reported_by_NYSERDA__Beginning_2004_Wind.xlsx',usecols=useful_columns2)
df2 = df2[(df2['Renewable Technology']=='Land Based Wind') | (df2['Renewable Technology']=='Offshore Wind')]
df2 = df2[df2['Project Status']=='Under Development']
df2.reset_index(drop = True, inplace = True)
df2 = df2.rename(columns={"Project Name": "p_name", "Year of Delivery Start Date": "p_year"})

# Data Cleaning

In [None]:
# df2['longitude']=df2['Georeference'].str.slice(7, 17)
# df2['latitude']=df2['Georeference'].str.slice(18, 27)
# df2['Georeference'].str.contains('POINT')
df2["clean_name"] = df2["Georeference"].apply(lambda x: str(x).split('(')[1].lstrip().split(')')[0] if "POINT" in str(x) else np.NaN)
df2["longitude"] = df2["clean_name"].apply(lambda x: str(x).split()[0] if " " in str(x) else np.NaN).astype(float)
df2["latitude"] = df2["clean_name"].apply(lambda x: str(x).split()[1] if " " in str(x) else np.NaN).astype(float)
df2.drop('Georeference', axis=1, inplace=True)
df2.drop('clean_name', axis=1, inplace=True)

In [None]:
for index, location_info in df2.iterrows():
  if pd.notnull(location_info['latitude']):
    print(location_info['p_name'])

Heritage Wind, LLC
Number Three Wind Farm
Alle-Catt Wind Farm
Eight Point Wind
Ball Hill
Bluestone Wind
High Bridge Wind and Battery
Empire Wind
Beacon Wind
Empire Wind 2
Baron Winds I
Baron Winds II
Prattsburgh Wind Farm
Sunrise Wind


# Save Dataset

In [None]:
project_data2 = df2[['p_name', 'p_year', 'longitude', 'latitude']]
project_data2['text'] = 'Project Name: ' + project_data2['p_name'] + '<br>Year of Delivery Start Date: ' + project_data2['p_year'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [None]:
project_data2.head()

Unnamed: 0,p_name,p_year,longitude,latitude,text
0,"Heritage Wind, LLC",2023.0,-78.208094,43.232126,"Project Name: Heritage Wind, LLC<br>Year of De..."
1,Number Three Wind Farm,2022.0,-75.472278,43.798604,Project Name: Number Three Wind Farm<br>Year o...
2,Alle-Catt Wind Farm,2025.0,-78.391547,42.572191,Project Name: Alle-Catt Wind Farm<br>Year of D...
3,Eight Point Wind,2023.0,-77.686829,42.05757,Project Name: Eight Point Wind<br>Year of Deli...
4,Ball Hill,2023.0,-79.167766,42.52668,Project Name: Ball Hill<br>Year of Delivery St...


In [None]:
project_data2.to_csv('nyserda_data.csv')
!cp nyserda_data.csv "drive/My Drive/Capstone Data"

# Load Data: Tracy's radius dataset

In [None]:
df3 = pd.read_csv('/content/drive/MyDrive/Capstone Data/farms_loc_radius.csv')

In [None]:
df3['ghg_radius_miles'] = df3['ghg_radius_m']/1609.34
df3 = df3.rename(columns={"xlong": "longitude", "ylat": "latitude"})
df3.head()

Unnamed: 0,p_name,longitude,latitude,ghg_radius_m,ghg_radius_miles
0,Arkwright Summit,-79.231833,42.407748,19872.4,12.348168
1,Cassadaga,-79.174704,42.315485,19493.1,12.112481
2,Copenhagen,-75.639337,43.890878,20474.0,12.721985
3,Dutch Hill/Cohocton,-77.454707,42.533052,35746.6,22.211963
4,Fenner Wind Power Project,-75.760677,42.989136,10397.6,6.460785


In [None]:
df3.to_csv('ghg_radius_data.csv')
!cp ghg_radius_data.csv "drive/My Drive/Capstone Data"

# Load Data: State Park Facility Points

In [20]:
df4 = pd.read_csv('/content/drive/MyDrive/Capstone Data/State_Park_Facility_Points.csv')
df4 = df4[df4.Category.str.contains('Park')]
df4.reset_index(drop = True, inplace = True)
df4 = df4[['Name','Longitude','Latitude']]

In [21]:
df4.head()

Unnamed: 0,Name,Longitude,Latitude
0,Watkins Glen,-76.900134,42.36754
1,FDR Four Freedoms,-73.960379,40.750677
2,Mine Kill,-74.461121,42.434025
3,Amherst,-78.748088,42.971001
4,Montauk Point,-71.881388,41.07145


In [22]:
df4.to_csv('state_park_data.csv')
!cp state_park_data.csv "drive/My Drive/Capstone Data"

# Load Data: Active Capital Projects, NYS Thruway Authority

In [3]:
df5 = pd.read_csv('/content/drive/MyDrive/Capstone Data/Capital_Projects__NYS_Thruway_Authority_Capital_Programs__Beginning_2005.csv')
df5.dropna(subset=['Latitude','Longitude'], inplace = True)
df5.reset_index(drop = True, inplace = True)
df5 = df5[['DIVISION','ID','Longitude','Latitude']]
df5['text'] = df5['DIVISION'] + ' ' + df5['ID']

In [4]:
df5.head()

Unnamed: 0,DIVISION,ID,Longitude,Latitude,text
0,Albany,C172.1,-74.211062,42.946016,Albany C172.1
1,Syracuse,H1118.1,-76.924232,42.958034,Syracuse H1118.1
2,Albany,C67.1,-73.701738,42.802867,Albany C67.1
3,Buffalo,B556.2,-78.940905,43.000894,Buffalo B556.2
4,Syracuse,H911.2,-76.233375,43.118117,Syracuse H911.2


In [6]:
df5.to_csv('thruway_authority_data.csv')
!cp thruway_authority_data.csv "drive/My Drive/Capstone Data"

# Create Map

In [None]:
windfarm1 = pd.read_csv('/content/drive/MyDrive/Capstone Data/usgs_data.csv')

In [None]:
windfarm2 = pd.read_csv('/content/drive/MyDrive/Capstone Data/nyserda_data.csv')

In [4]:
ghg_circle = pd.read_csv('/content/drive/MyDrive/Capstone Data/ghg_radius_data.csv')

In [20]:
map = folium.Map(location = [ghg_circle.latitude.mean(), ghg_circle.longitude.mean()], zoom_start=7, control_scale=True)
# for i in range(0,len(ghg_circle)):
#    folium.Circle(
#       location=[ghg_circle.iloc[i]['latitude'], ghg_circle.iloc[i]['longitude']],
#       popup=ghg_circle.iloc[i]['p_name'],
#       radius=float(ghg_circle.iloc[i]['ghg_radius_m']),
#       color='crimson',
#       fill=True,
#       fill_color='crimson'
#    ).add_to(map)
for index, location_info in ghg_circle.iterrows():
    folium.Circle(
        location = [location_info["latitude"], location_info["longitude"]], 
        radius = location_info['ghg_radius_m'],
        color='crimson',
        fill=True,
        fill_color='crimson',
        popup = folium.Popup(location_info["p_name"])
        ).add_to(map)

map

In [None]:
map = folium.Map(location = [windfarm1.latitude.mean(), windfarm1.longitude.mean()], zoom_start=7, control_scale=True)

group0 = folium.FeatureGroup(name='<span style=\\"color: blue;\\">Exist Projects</span>')
for index, location_info in windfarm1.iterrows():

    folium.CircleMarker(
        location = [location_info["latitude"], location_info["longitude"]], 
        radius = location_info['numb_turbines']/2,
        fill_color ='blue',
        popup = folium.Popup(location_info["text"], min_width=250, max_width=250)
        ).add_to(group0)
group0.add_to(map)

group1 = folium.FeatureGroup(name='<span style=\\"color: orange;\\">Future Projects</span>')
for index, location_info in windfarm2.iterrows():
    if pd.notnull(location_info['latitude']):
        folium.CircleMarker(
        location = [location_info["latitude"], location_info["longitude"]], 
        radius = 5,
        color = 'orange',
        fill_color ='red',
        popup = folium.Popup(location_info["text"], min_width=250, max_width=250)
        ).add_to(group1)
group1.add_to(map)

folium.map.LayerControl('topleft', collapsed=False).add_to(map)
map