
Exploratory Data Analysis using NYSERDA Data


In [4]:
# import modules, read data
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import seaborn as sns

## TODO: Replace with the path to your dataset
PATH_TO_DATASET = '/content/Large-scale_Renewable_Projects_Reported_by_NYSERDA__Beginning_2004_Wind.xlsx' 
############################################################

df = pd.read_excel(PATH_TO_DATASET)
df.head()

Unnamed: 0,Data Through Date,Eligibility,Project Name,Solicitation Name,Fixed REC Price,Index REC Strike Price,Renewable Technology,Generation Type,Type of Existing,Counterparty,...,Project Status,Year of Delivery Start Date,Contract Duration,New Renewable Capacity (MW),Bid Capacity (MW),Bid Quantity (MWh),Max Annual Contract Quantity (MWh),P10 Annual OREC Exceedance,Transmission Capacity (HVDC),Georeference
0,2022-07-15,Tier 1,Silver Lake Solar Project,RESRFP18-1,21.0,,Solar,New,,Silver Lake Solar LLC,...,Under Development,2024.0,20,24.99,24.99,38200.0,45840.0,,,POINT (-78.004538 42.731213)
1,2022-07-15,Tier 1,Great Gully Solar Farm (formerly Delight Farm),RESRFP20-1,,,Solar,New,,"Great Gully Solar Farm, LLC",...,Cancelled,2022.0,20,16.8,16.8,31052.0,37262.0,,,POINT (-76.659767 42.826929)
2,2022-07-15,Maintenance,Kayuta Lake Hydro _July 2021 Award,Tier 2 - Maintenance,,,Maintenance Hydroelectric,,,,...,Under Development,2022.0,3,,0.46,1681.0,1681.0,,,POINT (-75.112193 43.469339)
3,2022-07-15,Tier 1,Tannery Island - Tannery Island Hydro,3257,9.5,,Hydroelectric,Existing,Upgrade,Ampersand Tannery Island Hydro LLC,...,Operational,2016.0,20,0.17,0.16,827.0,827.0,,,POINT (-75.608418 43.979234)
4,2022-07-15,Maintenance,Kayuta Lake Hydro,Tier 2 - Maintenance,,,Maintenance Hydroelectric,,,,...,Completed,2018.0,3,,0.46,1897.0,1897.0,,,POINT (-75.112193 43.469339)


In [53]:
df['longitude']=df['Georeference'].str.slice(7, 17)

In [58]:
df['latitude']=df['Georeference'].str.slice(18, 27)

In [59]:
df['latitude']

0      42.731213
1      42.826929
2      43.469339
3      43.979234
4      43.469339
         ...    
223    43.247901
224    43.163267
225    41.032718
226    41.032718
227    40.507933
Name: latitude, Length: 228, dtype: object

In [60]:

import plotly.express as px

In [61]:
fig=px.scatter_geo(df,lat=df['latitude'], lon=df['longitude'])
fig.update_layout(title='Significant Windmills Location',title_x=1)
fig.show()

Exploratory Data Analytics and Visualizations

In [62]:
import plotly.graph_objects as go

Checking Null Years in dataset

In [96]:
df['Year of Delivery Start Date'].isnull().sum()

15

In [94]:
#Treating NAN year to be the year to have the mean year as their value

round(df['Year of Delivery Start Date'].mean(),0)



2019.0

In [104]:
df['Year of Delivery Start Date'].fillna(round(df['Year of Delivery Start Date'].mean(),0),inplace=True)

In [105]:
df['Year of Delivery Start Date'].isnull().sum()


0

In [116]:
scl = [0,"rgb(0,177,64)"],[1,"rgb(255, 0, 0)"]

fig = go.Figure(data=go.Scattergeo(
    lat = df['latitude'],
    lon = df['longitude'],
    text = df['Year of Delivery Start Date'].astype(str) + ' year',
    marker = dict(
        color = df['Year of Delivery Start Date'],
        colorscale = scl,
        reversescale = True,
        opacity = 0.5,
        size = 5,
        colorbar = dict(
            titleside = "right",
            outlinecolor = "rgba(68, 68, 68, 0)",
            ticks = "outside",
            showticksuffix = "last",
            dtick = 0.1
        )
    )
))

fig.update_layout(
    geo = dict(
        scope = 'north america',
        showland = True,
        landcolor = "rgb(212, 212, 212)",
        subunitcolor = "rgb(255, 255, 255)",
        countrycolor = "rgb(255, 255, 255)",
        showlakes = True,
        lakecolor = "rgb(255, 255, 255)",
        showsubunits = True,
        showcountries = True,
        resolution = 50,
        projection = dict(
            type = 'natural earth',
            rotation_lon = -100
        ),
        lonaxis = dict(
            showgrid = True,
            gridwidth = 0.5,
            range= [ -140.0, -55.0 ],
            dtick = 5
        ),
        lataxis = dict (
            showgrid = True,
            gridwidth = 0.5,
            range= [ 10.0, 60.0 ],
            dtick = 10

        )
    ),
    title='US New York City Windmills with Installation Year',
)
fig.show()


Life Span Based on Contract of the WindMill

In [115]:
import plotly.express as px

fig = px.bar(df, x=df['Year of Delivery Start Date'], y=df['Contract Duration'],
             hover_data=['Year of Delivery Start Date', 'Contract Duration'], color='Contract Duration',
             labels={'Year':'Contract Years'}, height=400)
fig.show()