### Imports and settings

Please do note that running this file requires `GeoPandas` to be installed

In [6]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import shapely as sh
from cartopy import crs as ccrs
import warnings
warnings.filterwarnings('ignore')
import imageio
import matplotlib

pd.set_option("display.max_rows", 4000)

# Define function to read csv files and convert to GeoDataFrame:

def csv_to_geo(path):
    csv_file = pd.read_csv(path)
    print(type(csv_file))
    
    if type(csv_file.geometry[0]) == sh.geometry.polygon.Polygon == False:
            polys = pd.Series([sh.wkt.loads(i) for i in csv_file.geometry])
            csv_file["geometry"]=polys
    
    return gpd.GeoDataFrame(data = csv_file, geometry = csv_file.geometry)

# IMPORT DATA FILES

In [7]:
cols = ["iso_code",
        "country",
        "year",
        "coal_consumption",
        "coal_production",
        "oil_consumption",
        "oil_production",
        "gas_consumption",
        "gas_production",
        "nuclear_consumption",
        "nuclear_electricity",
        "wind_consumption",
        "wind_electricity",
        "renewables_consumption",
        "renewables_electricity",
        "other_renewable_consumption",
        "other_renewable_electricity",
        "population"]

# Get dataset
df = pd.read_csv("World Energy Consumption.csv", usecols=cols)

# Get geodata
world = pd.read_csv("world.csv")
world.rename(columns={"iso_a3":"iso_code"},inplace=True)
world.sort_values(by="name", inplace=True)
world.reset_index(inplace=True)
world.drop(columns=["index","Unnamed: 0"], inplace=True)

print(len(df.index))

# Transform geodata into geoobjects instead of strings
polys = pd.Series([sh.wkt.loads(i) for i in world.geometry])
world["geometry"] = polys

droplist = [i for i in df.iso_code if i not in list(world.iso_code)]
dropindex = df.loc[df.iso_code.isin(droplist)].index

df.drop(index = dropindex, inplace = True)

17432


# Join data with the geodata

In [8]:
dfg = df.merge(right = world, how = "left", on = "iso_code")
dfg.to_csv("C:\\Users\\victo\\00_Nod_coding_bootcamp\\Week 6\\Project\\joined_data")

# Convert `DataFrames` to `GeoDataFrame`

In [9]:
countries = gpd.GeoDataFrame(data = dfg, geometry = dfg.geometry)
world = gpd.GeoDataFrame(data = world, geometry = world.geometry)

# Day 2

* Create dictionary containing countries : geometry
* Add empty rows for missing years
* Generate series containing geometry
* Add as column to full df

# Geo dictionary

In [10]:
geo_dict = {list(world.iso_code)[i] : [list(world.geometry)[i], list(world.name)[i]] for i in range(0,len(world.name))}  
del geo_dict["ATA"]

In [11]:
isos = list(countries.iso_code.unique())
del isos[1]

# Impute blank rows where data is missing

In [12]:
from IPython.display import clear_output

fillcols = [i for i in countries.columns if i not in("iso_code", "country","year","geometry")]
full_countries = pd.DataFrame(columns = countries.columns)
tick = 0

for a in isos:
    
    clear_output(wait=True)

    tick+=1
    prog = round(tick/len(countries.iso_code.unique())*100)
    print(f"Working on:\t {geo_dict[a][1]}")
    print(f"Progress: {prog}%")

    testdf = pd.DataFrame(columns = countries.columns)
    
    for i in range(0,120):
        if countries.loc[(countries.year==1900+i) & (countries.iso_code==a)].size ==0:
            #print("IN IF, a is", a, "I is", i)
            isocode = a
            country = geo_dict[a][1]
            year = 1900+i
            testdf.loc[i] = [isocode]+[country]+[year]+[np.nan]*len(fillcols)+[np.nan]
            
    country_df = pd.concat([testdf,countries.loc[countries.iso_code==a]])
    full_countries = pd.concat([full_countries,country_df])
        

                
full_countries.reset_index(inplace=True)
full_countries.drop(columns="index",inplace=True)

Working on:	 Zimbabwe
Progress: 99%


# Generate geography column and add to gdf

In [13]:
geocol = pd.Series([geo_dict[i][0] for i in full_countries.iso_code])
full_countries["geometry"] = geocol

# Impute Antarctica to make maps look nice.

In [14]:
numericcols = [i for i in countries.columns if i not in("iso_code", 
                                                        "country",
                                                        "year",
                                                        "continent",
                                                        "pop_est",
                                                        "gdp_md_est",
                                                        "geometry","name")]
ATA = pd.DataFrame(columns = countries.columns)
ATA.loc[0] = [np.nan]*len(full_countries.columns)

maxvec = [full_countries[i].min() for i in numericcols]
for i in range(0,120):
    iso_code = "ATA"
    country = "Antarctica"
    year = 1900+i
    geometry = world["geometry"][world.loc[world.iso_code=="ATA"].index[0]]
    ATA.loc[i] = [iso_code]+[country]+[year]+maxvec+[np.nan]*4+[geometry]

full_countries = pd.concat([full_countries,ATA])

# Reset index and inplace

In [15]:
full_countries.reset_index(inplace=True)
full_countries.drop(columns="index",inplace=True)

# Rename to something nice and reformat to geodataframe + save

In [16]:
# Save 1 for non normalized and one for normalized
lands_non_norm = full_countries.copy()
lands_non_norm = gpd.GeoDataFrame(data = full_countries, geometry = full_countries.geometry)
lands_non_norm.to_csv("countries_with_geo_no_normal_with_population.csv")

# Special case 1 

# Make Taiwan maximum in all numeric columns so that all maps stay relative to the same value over time.

In [1078]:
lands_norm = full_countries.copy()

In [1079]:
maxvec = [full_countries[i].mean() for i in numericcols]

# Get index positions for taiwan:

twn_index = full_countries.loc[full_countries.country=="Taiwan"].index

for i in twn_index:
    for j in numericcols:
        lands_norm[j][i] = maxvec[numericcols.index(j)]

# 2. Rename to something nice and reformat to geodataframe + save

In [1081]:
lands_norm = gpd.GeoDataFrame(data = full_countries, geometry = full_countries.geometry)
lands.to_csv("countries_with_geo_normalized.csv")