In [24]:
%matplotlib inline  
import os
from matplotlib.image import imread
import plotly
import numpy as np 
import pandas as pd 
import plotly.express as px
import plotly.graph_objects as go
import warnings
warnings.filterwarnings('ignore')

#Load Data
df = pd.read_csv('data.csv')
location_df = pd.read_csv('cordinate.csv')
df = df.drop(df.index[0:14145])
#Remove Unnamed Columns
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]


#Remove unwanted columns from Start to 1994. Contains mostly Nan.
start_loc=df.columns.get_loc("Indicator Code")
end_loc=df.columns.get_loc("1995")
df.drop(df.iloc[:, start_loc:end_loc], axis=1, inplace=True)
#Remove Empty Data rows
#health_df.dropna(subset = ["1995","2014"], inplace=True)
df.drop(['2012','2013','2014','2015','Country Code'], axis=1, inplace=True)

#Filter Rows Needed
a_df = df[(df['Indicator Name']=='Prevalence of anemia among children (% of children under 5)') |
        (df['Indicator Name']=='Prevalence of anemia among non-pregnant women (% of women ages 15-49)') |
        (df['Indicator Name']=='Prevalence of anemia among pregnant women (%)') |
        (df['Indicator Name']=='Prevalence of anemia among women of reproductive age (% of women ages 15-49)')]
a_dfs=a_df.groupby(['Country Name']).sum()
a_dfs.dropna(subset = ["2011"], inplace=True)
a_dfsm = a_dfs.apply(lambda x : x/4)
a_dfsm['Country Name'] = a_dfsm.index
a_dfsm_t=a_dfsm.melt(id_vars=["Country Name"], var_name="Year", value_name="anemia")
#df.drop(['Indicator Name'], axis=1, inplace=True)

#Filter prevalence of wasting rows
w_df = df[(df['Indicator Name']=='Prevalence of wasting (% of children under 5)') |
        (df['Indicator Name']=='Prevalence of wasting, female (% of children under 5)') |
        (df['Indicator Name']=='Prevalence of wasting, male (% of children under 5)')]
w_dfs=w_df.groupby(['Country Name']).sum()
w_dfs.dropna(subset = ["2011"], inplace=True)
w_dfsm = w_dfs.apply(lambda x : x/3)
w_dfsm['Country Name'] = w_dfsm.index
w_dfsm_t=w_dfsm.melt(id_vars=["Country Name"], var_name="Year", value_name="wasting")

#Filter prevalence of Aids
hiv_df = df[(df['Indicator Name']=='Prevalence of HIV, total (% of population ages 15-49)') ]
hiv_dfs=hiv_df.groupby(['Country Name']).sum()
hiv_dfs.dropna(subset = ["2011"], inplace=True)
hiv_dfs['Country Name'] = hiv_dfs.index
hiv_dfs_t=hiv_dfs.melt(id_vars=["Country Name"], var_name="Year", value_name="HIV")

#Filter prevalence of overweight
o_df = df[(df['Indicator Name']=='Prevalence of overweight (% of adults)') |
        (df['Indicator Name']=='Prevalence of overweight (% of children under 5)')]
o_dfs=o_df.groupby(['Country Name']).sum()
o_dfs.dropna(subset = ["2011"], inplace=True)
o_dfsm = o_dfs.apply(lambda x : x/2)
o_dfsm['Country Name'] = o_dfsm.index
o_dfsm_t=o_dfsm.melt(id_vars=["Country Name"], var_name="Year", value_name="overweight")

#Filter prevalence of syphilis
s_df = df[(df['Indicator Name']=='Prevalence of syphilis (% of women attending antenatal care)') ]
s_dfs=s_df.groupby(['Country Name']).sum()
s_dfs.dropna(subset = ["2011"], inplace=True)
s_dfs['Country Name'] = s_dfs.index
s_dfs_t=s_dfs.melt(id_vars=["Country Name"], var_name="Year", value_name="syphilis")

#Filter prevalence of tuberculosis
t_df = df[(df['Indicator Name']=='Prevalence of tuberculosis (per 100,000 population)') ]
t_dfs=t_df.groupby(['Country Name']).sum()
t_dfs.dropna(subset = ["2011"], inplace=True)
t_dfsm = t_dfs.apply(lambda x : (x/100000)*100)
t_dfsm['Country Name'] = t_dfsm.index
t_dfsm_t=t_dfsm.melt(id_vars=["Country Name"], var_name="Year", value_name="tuberculosis")

#Filter prevalence of undernourishment
u_df = df[(df['Indicator Name']=='Prevalence of undernourishment (% of population)') ]
u_dfs=u_df.groupby(['Country Name']).sum()
u_dfs.dropna(subset = ["2011"], inplace=True)
u_dfs['Country Name'] = u_dfs.index
u_dfs_t=u_dfs.melt(id_vars=["Country Name"], var_name="Year", value_name="undernourishment")


data_merge_1 = pd.merge(a_dfsm_t,w_dfsm_t, on=['Country Name','Year'])
data_merge_2 = pd.merge(hiv_dfs_t,data_merge_1, on=['Country Name','Year'])
data_merge_3 = pd.merge(o_dfsm_t,data_merge_2, on=['Country Name','Year'])
data_merge_4 = pd.merge(s_dfs_t,data_merge_3, on=['Country Name','Year'])
data_merge_5 = pd.merge(t_dfsm_t,data_merge_4, on=['Country Name','Year'])
data_merge_final = pd.merge(u_dfs_t,data_merge_5, on=['Country Name','Year'])

def getLatitude(name):
    response=location_df[location_df.name == name]['latitude'].values
    if response is None or len(response)!=1:
        return np.nan
    return str(response[0])
def getLongitude(name):
    response=location_df[location_df.name == name]['longitude'].values
    if response is None or len(response)!=1:
        return np.nan
    return str(response[0])


data_merge_final['latitude']=data_merge_final['Country Name'].apply(getLatitude)
data_merge_final['longitude']=data_merge_final['Country Name'].apply(getLongitude)
data_merge_final.dropna(subset = ["longitude"], inplace=True)
data_final=data_merge_final[(data_merge_final['Year']=='2010')]
#anemia,wasting,HIV,overweight,syphilis,tuberculosis,undernourishment
colors = {'anemia': '#ecec84',
          'wasting': 'blue' ,
          'HIV': 'green',
          'overweight': '#ffb69b',
          'syphilis': 'lightseagreen',
          'tuberculosis': 'black',
          'undernourishment': '#7ccaae'}


#df['text'] = df['name'] + '<br>Population ' + (df['pop']/1e6).astype(str)+' million'
disease = ['anemia','wasting','HIV','overweight','syphilis','tuberculosis','undernourishment']
#colors = ["royalblue","crimson","lightseagreen","orange","lightgrey"]
#cities = []
#scale = 5000

fig = go.Figure()

for i in disease:
    fig.add_trace(go.Scattergeo(
        lon = data_final['longitude'],
        lat = data_final['latitude'],
        text = data_final['Country Name'],
        marker = dict(
            size = data_final[i]*4,
            color = colors[i],
            line_color='rgb(40,40,40)',
            line_width=0.5,
            sizemode = 'area'
        ),
        name = i.upper()))

fig.update_layout(
        title_text = 'Prevalence of Diseases(% of population)<br>(Click legend to toggle traces)',
        showlegend = True,
        geo = dict(
            scope='world',
            projection=dict( type = 'natural earth'),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"
        )
    )

fig.show()


In [25]:
plotly.offline.plot(fig, filename="map.html")

'map.html'