<a href="https://colab.research.google.com/github/wcj365/python-stats-dataviz/blob/master/wdx_2000_2020_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import plotly
import plotly.express as px

DATA_URL = "https://raw.githubusercontent.com/wcj365/jay_data690/main/wdi_data_2000_2020.csv"

config_dict = {'editable': True, 
                'edits' : {'titleText': True, 
                           "axisTitleText": True,
                           'annotationText' : True, 
                           'annotationPosition' : True,
                           "shapePosition": True,
                           'legendPosition' : True,
                           'legendText' : False},
               'modeBarButtonsToAdd':['drawline',
                                      'drawopenpath',
                                      'drawclosedpath',
                                      'drawcircle',
                                      'drawrect',
                                      'eraseshape'],
               'toImageButtonOptions' : {'scale' : 2}
              }    

In [2]:
df = pd.read_csv(DATA_URL)

df.head()

Unnamed: 0.1,Unnamed: 0,Year,value,indicator,Country Code,Country Name,Region,Income Group,Lending Type
0,0,2000,20779957.0,SP.POP.TOTL,AFG,Afghanistan,South Asia,Low income,IDA
1,1,2001,21606992.0,SP.POP.TOTL,AFG,Afghanistan,South Asia,Low income,IDA
2,2,2002,22600774.0,SP.POP.TOTL,AFG,Afghanistan,South Asia,Low income,IDA
3,3,2003,23680871.0,SP.POP.TOTL,AFG,Afghanistan,South Asia,Low income,IDA
4,4,2004,24726689.0,SP.POP.TOTL,AFG,Afghanistan,South Asia,Low income,IDA


In [3]:
df.drop(columns=["Unnamed: 0"], inplace=True)

df.tail()

Unnamed: 0,Year,value,indicator,Country Code,Country Name,Region,Income Group,Lending Type
16253,2015,18.0,SH.STA.SUIC.P5,ZWE,Zimbabwe,Sub-Saharan Africa,Lower middle income,Blend
16254,2016,16.8,SH.STA.SUIC.P5,ZWE,Zimbabwe,Sub-Saharan Africa,Lower middle income,Blend
16255,2017,15.0,SH.STA.SUIC.P5,ZWE,Zimbabwe,Sub-Saharan Africa,Lower middle income,Blend
16256,2018,14.0,SH.STA.SUIC.P5,ZWE,Zimbabwe,Sub-Saharan Africa,Lower middle income,Blend
16257,2019,14.1,SH.STA.SUIC.P5,ZWE,Zimbabwe,Sub-Saharan Africa,Lower middle income,Blend


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16258 entries, 0 to 16257
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Year          16258 non-null  int64  
 1   value         16258 non-null  float64
 2   indicator     16258 non-null  object 
 3   Country Code  16258 non-null  object 
 4   Country Name  16258 non-null  object 
 5   Region        16258 non-null  object 
 6   Income Group  16258 non-null  object 
 7   Lending Type  16258 non-null  object 
dtypes: float64(1), int64(1), object(6)
memory usage: 1016.2+ KB


In [5]:
df_2019 = df.query("Year == 2019")
df_2019.head()

Unnamed: 0,Year,value,indicator,Country Code,Country Name,Region,Income Group,Lending Type
19,2019,38041757.0,SP.POP.TOTL,AFG,Afghanistan,South Asia,Low income,IDA
40,2019,2854191.0,SP.POP.TOTL,ALB,Albania,Europe & Central Asia,Upper middle income,IBRD
61,2019,43053054.0,SP.POP.TOTL,DZA,Algeria,Middle East & North Africa,Lower middle income,IBRD
82,2019,55312.0,SP.POP.TOTL,ASM,American Samoa,East Asia & Pacific,Upper middle income,Not classified
103,2019,77146.0,SP.POP.TOTL,AND,Andorra,Europe & Central Asia,High income,Not classified


In [12]:
df_2019.shape

(789, 8)

In [8]:
df_2019[df_2019["Country Code"] == "AFG"]

Unnamed: 0,Year,value,indicator,Country Code,Country Name,Region,Income Group,Lending Type
19,2019,38041760.0,SP.POP.TOTL,AFG,Afghanistan,South Asia,Low income,IDA
4567,2019,64.833,SP.DYN.LE00.IN,AFG,Afghanistan,South Asia,Low income,IDA
8569,2019,2152.366,NY.GDP.PCAP.PP.CD,AFG,Afghanistan,South Asia,Low income,IDA
12617,2019,4.1,SH.STA.SUIC.P5,AFG,Afghanistan,South Asia,Low income,IDA


In [9]:
df_2019_pivot = df_2019.pivot_table(index=["Year", "Country Code", "Country Name", "Region", "Income Group", "Lending Type"], 
                                    columns="indicator", values="value")
df_2019_pivot.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,indicator,NY.GDP.PCAP.PP.CD,SH.STA.SUIC.P5,SP.DYN.LE00.IN,SP.POP.TOTL
Year,Country Code,Country Name,Region,Income Group,Lending Type,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019,ABW,Aruba,Latin America & Caribbean,High income,Not classified,,,76.293,106310.0
2019,AFG,Afghanistan,South Asia,Low income,IDA,2152.366489,4.1,64.833,38041757.0
2019,AGO,Angola,Sub-Saharan Africa,Lower middle income,IBRD,6952.419362,6.1,61.147,31825299.0
2019,ALB,Albania,Europe & Central Asia,Upper middle income,IBRD,14012.976435,4.3,78.573,2854191.0
2019,AND,Andorra,Europe & Central Asia,High income,Not classified,,,,77146.0


In [10]:
df_2019_pivot = df_2019_pivot.reset_index()

df_2019_pivot.head()

indicator,Year,Country Code,Country Name,Region,Income Group,Lending Type,NY.GDP.PCAP.PP.CD,SH.STA.SUIC.P5,SP.DYN.LE00.IN,SP.POP.TOTL
0,2019,ABW,Aruba,Latin America & Caribbean,High income,Not classified,,,76.293,106310.0
1,2019,AFG,Afghanistan,South Asia,Low income,IDA,2152.366489,4.1,64.833,38041757.0
2,2019,AGO,Angola,Sub-Saharan Africa,Lower middle income,IBRD,6952.419362,6.1,61.147,31825299.0
3,2019,ALB,Albania,Europe & Central Asia,Upper middle income,IBRD,14012.976435,4.3,78.573,2854191.0
4,2019,AND,Andorra,Europe & Central Asia,High income,Not classified,,,,77146.0


In [11]:
df_2019_pivot.shape

(217, 10)

In [13]:
df_2019_pivot[df_2019_pivot.isna().any(axis=1)]

indicator,Year,Country Code,Country Name,Region,Income Group,Lending Type,NY.GDP.PCAP.PP.CD,SH.STA.SUIC.P5,SP.DYN.LE00.IN,SP.POP.TOTL
0,2019,ABW,Aruba,Latin America & Caribbean,High income,Not classified,,,76.293,106310.0
4,2019,AND,Andorra,Europe & Central Asia,High income,Not classified,,,,77146.0
8,2019,ASM,American Samoa,East Asia & Pacific,Upper middle income,Not classified,,,,55312.0
24,2019,BMU,Bermuda,North America,High income,Not classified,85263.763003,,81.866585,63913.0
34,2019,CHI,Channel Islands,Europe & Central Asia,High income,Not classified,,,83.086,172264.0
45,2019,CUB,Cuba,Latin America & Caribbean,Upper middle income,Not classified,,14.5,78.802,11333484.0
46,2019,CUW,Curacao,Latin America & Caribbean,High income,Not classified,25530.206866,,,157441.0
47,2019,CYM,Cayman Islands,Latin America & Caribbean,High income,Not classified,76747.666306,,,64948.0
52,2019,DMA,Dominica,Latin America & Caribbean,Upper middle income,Blend,12892.0211,,,71808.0
58,2019,ERI,Eritrea,Sub-Saharan Africa,Low income,IDA,,10.9,66.321,


In [14]:
df_2019_pivot[df_2019_pivot.isna().any(axis=1)].shape

(41, 10)

In [15]:
df_2019_pivot.dropna(inplace=True)

df_2019_pivot.head()

indicator,Year,Country Code,Country Name,Region,Income Group,Lending Type,NY.GDP.PCAP.PP.CD,SH.STA.SUIC.P5,SP.DYN.LE00.IN,SP.POP.TOTL
1,2019,AFG,Afghanistan,South Asia,Low income,IDA,2152.366489,4.1,64.833,38041757.0
2,2019,AGO,Angola,Sub-Saharan Africa,Lower middle income,IBRD,6952.419362,6.1,61.147,31825299.0
3,2019,ALB,Albania,Europe & Central Asia,Upper middle income,IBRD,14012.976435,4.3,78.573,2854191.0
5,2019,ARE,United Arab Emirates,Middle East & North Africa,High income,Not classified,71150.542141,6.4,77.972,9770526.0
6,2019,ARG,Argentina,Latin America & Caribbean,Upper middle income,IBRD,22999.275053,8.4,76.667,44938712.0


In [16]:
df_2019_pivot.shape

(176, 10)

In [48]:
fig = px.scatter(df_2019_pivot[df_2019_pivot["Region"] == "South Asia"], 
                 x="NY.GDP.PCAP.PP.CD", 
                 y="SP.DYN.LE00.IN",
                 size="SP.POP.TOTL",
                 size_max=100,
                 text="Country Code",
                 height=800,
                 width=600,
                 hover_name="Country Name",
                 template=list(plotly.io.templates.keys())[6],
                 labels={"NY.GDP.PCAP.PP.CD":"2019 GDP per Capita","SP.DYN.LE00.IN":"2019 Life Expectancy"},
                 title="Wealth vs Health for South Asia",
                 #color="Region"
)

fig.update_traces(textposition='middle right')
fig.show()

In [41]:
fig = px.scatter(df_2019_pivot[df_2019_pivot["Region"] == "South Asia"], 
                 x="NY.GDP.PCAP.PP.CD", 
                 y="SP.DYN.LE00.IN",
                 size="SP.POP.TOTL",
                 size_max=200,
                 text="Country Code",
                 hover_name="Country Name",
                 template=list(plotly.io.templates.keys())[6],
                 labels={"NY.GDP.PCAP.PP.CD":"2019 GDP per Capita","SP.DYN.LE00.IN":"2019 Life Expectancy"},
                 title="Wealth vs Health for South Asia",
                 #color="Region"
)

fig.show()

In [None]:
fig = px.scatter(df_2019_pivot.query("Region == 'East Asia & Pacific'"), 
                 x="NY.GDP.PCAP.PP.CD", 
                 y="SP.DYN.LE00.IN",
                 size="SP.POP.TOTL",
                 size_max=200,
                 labels={"NY.GDP.PCAP.PP.CD":"2019 GDP per Capita","SP.DYN.LE00.IN":"2019 Life Expectancy"},
                 title="Wealth vs Health",
                 template=list(plotly.io.templates.keys())[1],
                 height=800,
                 text="Country Name",
                 color="Country Name")
fig.update_traces(textposition='top center')
#fig.update_traces(textposition='middle right')
fig.update_layout(showlegend=False)

In [None]:
fig = px.scatter(df_2019_pivot.query("Region == 'East Asia & Pacific'"), 
                 x="NY.GDP.PCAP.PP.CD", 
                 y="SP.DYN.LE00.IN",
 #                size="SP.POP.TOTL",
                 size_max=200,
                 labels={"NY.GDP.PCAP.PP.CD":"2019 GDP per Capita","SP.DYN.LE00.IN":"2019 Life Expectancy"},
                 title="Wealth vs Health",
                 template=list(plotly.io.templates.keys())[1],
                 height=800,
  #               text="Country Name",
                 color="Country Name")
fig.update_traces(textposition='top center')
#fig.update_traces(textposition='middle right')
fig.update_layout(showlegend=False)


for index, row in df_2019_pivot.query("Region == 'East Asia & Pacific'").iterrows():
    anno_dict = dict(
        font=dict(color="black"),
        x=row["NY.GDP.PCAP.PP.CD"],
        y=row["SP.DYN.LE00.IN"],
        showarrow=False,
        text=row["Country Name"],
        textangle=0,
        xanchor='left',
        yanchor="middle",
        xref="x",
        yref="y"
    )
    fig.add_annotation(anno_dict) 

fig.show(config=config_dict)

In [None]:
list(plotly.io.templates.keys())[0]

'ggplot2'

In [32]:
list(plotly.io.templates.keys())

['ggplot2',
 'seaborn',
 'simple_white',
 'plotly',
 'plotly_white',
 'plotly_dark',
 'presentation',
 'xgridoff',
 'ygridoff',
 'gridon',
 'none']

In [55]:
fig = px.pie(df_2019_pivot, 
             values="SP.POP.TOTL", 
             names='Country Name', 
             height=800,
             title='Population of The World')

fig.update_traces(textposition='inside',textinfo="label+text+value")
fig.show()