## Initial analysis

In [1]:
import numpy as np
import pandas as pd


data_path = "global_power_plant_database.csv"

In [2]:
powerplant_data = pd.read_csv(data_path)

In [4]:
powerplant_data.head(5)

Unnamed: 0,country,country_long,name,gppd_idnr,capacity_mw,latitude,longitude,primary_fuel,other_fuel1,other_fuel2,...,url,geolocation_source,wepp_id,year_of_capacity_data,generation_gwh_2013,generation_gwh_2014,generation_gwh_2015,generation_gwh_2016,generation_gwh_2017,estimated_generation_gwh
0,AFG,Afghanistan,Kajaki Hydroelectric Power Plant Afghanistan,GEODB0040538,33.0,32.322,65.119,Hydro,,,...,http://globalenergyobservatory.org,GEODB,1009793.0,2017.0,,,,,,
1,AFG,Afghanistan,Mahipar Hydroelectric Power Plant Afghanistan,GEODB0040541,66.0,34.556,69.4787,Hydro,,,...,http://globalenergyobservatory.org,GEODB,1009795.0,2017.0,,,,,,
2,AFG,Afghanistan,Naghlu Dam Hydroelectric Power Plant Afghanistan,GEODB0040534,100.0,34.641,69.717,Hydro,,,...,http://globalenergyobservatory.org,GEODB,1009797.0,2017.0,,,,,,
3,AFG,Afghanistan,Nangarhar (Darunta) Hydroelectric Power Plant ...,GEODB0040536,11.55,34.4847,70.3633,Hydro,,,...,http://globalenergyobservatory.org,GEODB,1009787.0,2017.0,,,,,,
4,AFG,Afghanistan,Northwest Kabul Power Plant Afghanistan,GEODB0040540,42.0,34.5638,69.1134,Gas,,,...,http://globalenergyobservatory.org,GEODB,,2017.0,,,,,,


Let's see the dataset size.

In [5]:
print(f"The dataset consists of {powerplant_data.shape[0]} rows and {powerplant_data.shape[1]} columns")

The dataset consists of 29910 rows and 24 columns


Let's see columns of the dataset.

In [6]:
powerplant_data.columns

Index(['country', 'country_long', 'name', 'gppd_idnr', 'capacity_mw',
       'latitude', 'longitude', 'primary_fuel', 'other_fuel1', 'other_fuel2',
       'other_fuel3', 'commissioning_year', 'owner', 'source', 'url',
       'geolocation_source', 'wepp_id', 'year_of_capacity_data',
       'generation_gwh_2013', 'generation_gwh_2014', 'generation_gwh_2015',
       'generation_gwh_2016', 'generation_gwh_2017',
       'estimated_generation_gwh'],
      dtype='object')

Companies with most powerplants

In [7]:
powerplant_data['owner'].value_counts(dropna=False).head(15)

NaN                              10379
Lightsource Renewable Energy       120
Cypress Creek Renewables           109
Sustainable Power Group  LLC        85
Verbund                             83
Pacific Gas & Electric Co.          81
Erie Boulevard Hydropower LP        71
CFE                                 66
100% Vattenfall                     65
Southern California Edison Co       62
WM Renewable Energy LLC             61
PacifiCorp                          59
Tesla Inc.                          58
Hydro-Québec                        58
U S Bureau of Reclamation           57
Name: owner, dtype: int64

Countries with most powerplants

In [8]:
powerplant_data['country_long'].value_counts(dropna=False).head(15)

United States of America    8686
China                       3041
United Kingdom              2536
Brazil                      2340
France                      2017
Canada                      1154
Germany                      982
India                        861
Spain                        614
Russia                       505
Portugal                     462
Czech Republic               457
Australia                    429
Japan                        329
Norway                       306
Name: country_long, dtype: int64

Primary powerplant type

In [9]:
powerplant_data['primary_fuel'].value_counts(normalize=True, dropna=False)*100

Hydro             23.921765
Solar             19.822802
Wind              17.345369
Gas               13.112671
Coal               7.990639
Oil                7.656302
Biomass            4.667335
Waste              3.634236
Nuclear            0.661986
Geothermal         0.631896
Storage            0.193915
Other              0.147108
Cogeneration       0.137078
Petcoke            0.043464
Wave and Tidal     0.033434
Name: primary_fuel, dtype: float64

## Let's see some cool facts, for starters, Tesla powerplants

In [10]:
tesla_powerplant_data = powerplant_data[powerplant_data["owner"] == "Tesla Inc."]
tesla_powerplant_data.head(5)

Unnamed: 0,country,country_long,name,gppd_idnr,capacity_mw,latitude,longitude,primary_fuel,other_fuel1,other_fuel2,...,url,geolocation_source,wepp_id,year_of_capacity_data,generation_gwh_2013,generation_gwh_2014,generation_gwh_2015,generation_gwh_2016,generation_gwh_2017,estimated_generation_gwh
20925,USA,United States of America,AVS Lancaster 1,USA0060085,3.7,34.7489,-117.9736,Solar,,,...,http://www.eia.gov/electricity/data/browser/,U.S. Energy Information Administration,70684.0,2017.0,,,,0.0,8.472,1.649932
20931,USA,United States of America,Actus Lend Lease DMAFB,USA0058632,3.0,32.1825,-110.8633,Solar,,,...,http://www.eia.gov/electricity/data/browser/,U.S. Energy Information Administration,,2017.0,4.93317,4.56687,4.39659,4.481,4.402,
20954,USA,United States of America,Advance Stores Company Inc,USA0059788,1.0,42.015,-72.5119,Solar,,,...,http://www.eia.gov/electricity/data/browser/,U.S. Energy Information Administration,,2017.0,,0.07623,1.38006,1.447,1.308,
21317,USA,United States of America,BJ's Wholesale Club Inc- Uxbridge,USA0060116,1.0,42.0197,-71.6041,Solar,,,...,http://www.eia.gov/electricity/data/browser/,U.S. Energy Information Administration,,2017.0,,,,0.0,0.388,0.445928
21318,USA,United States of America,BJ's Wholesale Club Inc Dist Center,USA0060227,1.4,40.0985,-74.8259,Solar,,,...,http://www.eia.gov/electricity/data/browser/,U.S. Energy Information Administration,,2017.0,,,,0.299,2.299,0.624299


In [13]:
print(f"Currently, Tesla Inc. has {tesla_powerplant_data.shape[0]} powerplants; all of them solar")

Currently, Tesla Inc. has 58 powerplants; all of them solar


In [12]:
tesla_powerplant_data['country'].value_counts(normalize=True, dropna=False)*100

USA    100.0
Name: country, dtype: float64

### To-do next:
- data cleaning
- deciding on what to do with this dataset