# Data Transformation: Pandas to SQL

In [1]:
# Import dependencies
import pandas as pd
from config import pgadmin_info
from sqlalchemy import create_engine
from sqlalchemy.dialects import postgresql

## Import data to Jupyter Notebook

In [2]:
# Read CSVs
xsustainibility_df = pd.read_csv("../resources/sustainability.csv")
xcountries_df = pd.read_csv("../resources/countries.csv")
xhappiness_df = pd.read_csv("../resources/happiness.csv")
xincome_pc_df = pd.read_csv("../resources/income_pc.csv", skiprows=4)
xppp_df = pd.read_csv("../resources/ppp.csv", skiprows=4)
xunemployment_df = pd.read_csv("../resources/unemployment.csv", skiprows=4)
xgdp_df = pd.read_csv("../resources/gdp.csv")

In [3]:
# Create regions table
regions_df = pd.DataFrame(xhappiness_df["Regional indicator"].copy())
regions_df = regions_df.reset_index()
regions_df = regions_df.groupby("Regional indicator", as_index=False).min()
regions_df = regions_df.rename(columns={'Regional indicator' : 'region_name',
                                            'index' : 'region_id',
                                            })
regions_df = regions_df[['region_id','region_name']].copy()
regions_df

Unnamed: 0,region_id,region_name
0,18,Central and Eastern Europe
1,37,Commonwealth of Independent States
2,24,East Asia
3,14,Latin America and Caribbean
4,13,Middle East and North Africa
5,7,North America and ANZ
6,65,South Asia
7,30,Southeast Asia
8,48,Sub-Saharan Africa
9,0,Western Europe


In [18]:
# Generate region IDs
rn_list = regions_df['region_name'].to_list()
regsid = []
for rn in rn_list:
    regid =[char for char in rn if char.isupper()]
    redstr = " "
    for i in range(len(regid)):
        redstr = redstr + f"{regid[i]}"
    regsid.append(redstr)
regions_df['region_id']=regsid
regions_df = pd.DataFrame(regions_df)
regions_df.head()

Unnamed: 0,region_id,region_name
0,CEE,Central and Eastern Europe
1,CIS,Commonwealth of Independent States
2,EA,East Asia
3,LAC,Latin America and Caribbean
4,MENA,Middle East and North Africa


In [6]:
# Generate countries table
countries_df = xcountries_df.rename(columns={'Country Code' : 'country_id',
                                            'Country' : 'country_name',
                                             'Population': 'population'
                                            })

countries_df.head()

Unnamed: 0,country_name,country_id,population
0,Afghanistan,AFG,26023100
1,Åland Islands,ALA,28875
2,Albania,ALB,2893005
3,Algeria,DZA,39500000
4,American Samoa,ASM,55519


In [35]:
# Associate countries to respective region ID
df = xhappiness_df[['Country code', 'Regional indicator']]
df = df.rename(columns={'Country code' : 'country_id',
                        'Regional indicator': 'region_name'
                        })
rn_list = df['region_name'].to_list()
regsid = []
for rn in rn_list:
    regid =[char for char in rn if char.isupper()]
    redstr = " "
    for i in range(len(regid)):
        redstr = redstr + f"{regid[i]}"
    regsid.append(redstr)
df['region_id']=regsid
df = pd.DataFrame(df)
countries_df = countries_df.merge(df, how='inner')
countries_df = countries_df.drop(columns=['region_name'])
countries_df.head()

Unnamed: 0,country_name,country_id,population,region_id
0,Afghanistan,AFG,26023100,SA
1,Albania,ALB,2893005,CEE
2,Algeria,DZA,39500000,MENA
3,Argentina,ARG,43131966,LAC
4,Armenia,ARM,3006800,CIS


In [40]:
# Generate happiness table
happiness_df = xhappiness_df[['Country code', 'Ladder score', 'Social support', 'Explained by: Freedom to make life choices']]
happiness_df = happiness_df.rename(columns={'Country code' : 'country_id',
                                            'Ladder score' : 'happiness',
                                             'Social support': 'social_support',
                                             'Explained by: Freedom to make life choices':'freedom'
                                            })
happiness_df.head()

Unnamed: 0,country_id,happiness,social_support,freedom
0,FIN,7.8087,0.95433,0.662317
1,DNK,7.6456,0.955991,0.66504
2,CHE,7.5599,0.942847,0.628954
3,ISL,7.5045,0.97467,0.661981
4,NOR,7.488,0.952487,0.670201


In [None]:
# Generate economy table
countrylist = []
incomelist = []
ppplist = []
unemlist = []
gdplist = []

csvfiles = [xincome_pc_df, xppp_df, xunemployment_df, xgdp_df]
for file in csvfiles:
    

In [42]:
#prueba = xincome_pc_df
xppp_df.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,Unnamed: 65
0,Aruba,ABW,"PPP conversion factor, GDP (LCU per internatio...",PA.NUS.PPP,,,,,,,...,1.317801,1.285141,1.308843,1.362318,1.355045,1.35069,,,,
1,Afghanistan,AFG,"PPP conversion factor, GDP (LCU per internatio...",PA.NUS.PPP,,,,,,,...,17.242264,17.509419,17.035878,17.02252,17.445828,17.205558,17.144309,17.914483,,
2,Angola,AGO,"PPP conversion factor, GDP (LCU per internatio...",PA.NUS.PPP,,,,,,,...,65.681671,66.019371,65.000671,68.182045,80.778969,92.951721,117.116076,146.167168,,
3,Albania,ALB,"PPP conversion factor, GDP (LCU per internatio...",PA.NUS.PPP,,,,,,,...,43.655,44.113709,42.893807,42.705831,42.385879,42.255535,42.125796,41.342391,,
4,Andorra,AND,"PPP conversion factor, GDP (LCU per internatio...",PA.NUS.PPP,,,,,,,...,,,,,,,,,,
