# IMPORT OF REQUIRED LIBRARY

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import seaborn as sns
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
import missingno as  msno
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [3]:
df = pd.read_csv('reported_numbers.csv')

# Dataset here consists of columns like country,year,cases,death and WHO region

In [18]:
df.head()

Unnamed: 0,Country,Year,No. of cases,No. of deaths,WHO Region
0,Afghanistan,2017,161778.0,10.0,Eastern Mediterranean
1,Algeria,2017,0.0,0.0,Africa
2,Angola,2017,3874892.0,13967.0,Africa
3,Argentina,2017,0.0,1.0,Americas
6,Bangladesh,2017,4893.0,13.0,South-East Asia


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1944 entries, 0 to 1943
Data columns (total 5 columns):
Country          1944 non-null object
Year             1944 non-null int64
No. of cases     1710 non-null float64
No. of deaths    1675 non-null float64
WHO Region       1944 non-null object
dtypes: float64(2), int64(1), object(2)
memory usage: 76.1+ KB


In [6]:
df.isnull().sum()

Country            0
Year               0
No. of cases     234
No. of deaths    269
WHO Region         0
dtype: int64

In [7]:
df.dropna(inplace=True)
df.isnull().sum()

Country          0
Year             0
No. of cases     0
No. of deaths    0
WHO Region       0
dtype: int64

# By Grouping Countries with no.of cases and no. of death we can here we can analyze that no. of deaths is much lesser than number of cases i.e people with who are all affected by malaria didn't loosed their life. 

In [20]:
df_group =df.groupby('Country')["No. of cases","No. of deaths"].sum().reset_index()
df_group

Unnamed: 0,Country,No. of cases,No. of deaths
0,Afghanistan,1045271.0,363.0
1,Algeria,1044.0,4.0
2,Angola,26006152.0,125364.0
3,Argentina,2098.0,2.0
4,Armenia,355.0,0.0
...,...,...,...
100,Venezuela (Bolivarian Republic of),1039480.0,278.0
101,Viet Nam,445213.0,564.0
102,Yemen,895910.0,544.0
103,Zambia,18619166.0,8898.0


In [9]:
df_cases = df_group[["Country","No. of cases"]]
df_cases.head()

Unnamed: 0,Country,No. of cases
0,Afghanistan,1045271.0
1,Algeria,1044.0
2,Angola,26006152.0
3,Argentina,2098.0
4,Armenia,355.0


# To get much better interactive visualization here i have used plotly 

# Top 10 countries with the highest incidence of malaria 


here we can see that Democratc Republic of the congo has highest incidence of malaria with number of cases as 74842893, next uganda then burkina faso similarly the death cases is also high for Democratc Republic of the congo. while comparing death cases and no. of cases, no.of cases is exponentially higher than death cases

In [23]:
pio.templates.default ='plotly_dark'
fig = px.bar(df_cases.sort_values("No. of cases",ascending=False)[:10][::-1],x="No. of cases",y ="Country",text="No. of cases",
             title="Top 10 countries with the highest incidence of malaria",
             color_discrete_sequence= px.colors.qualitative.Light24,height=900,orientation="h")#
fig.show()

# Top 10 countries with the highest number of deaths 


In [24]:
df_death = df_group[["Country","No. of deaths"]]
pio.templates.default ='plotly_dark'
fig = px.bar(df_death.sort_values("No. of deaths",ascending=False)[:10][::-1],x="No. of deaths",y ="Country",text="No. of deaths",
             title="Top 10 countries with the highest number of deaths",
             color_discrete_sequence= px.colors.qualitative.Light24,height=800,orientation="h")
fig.show()

# malaria cases across WHO regions

In [12]:
who_group =df.groupby('WHO Region')["No. of cases","No. of deaths"].sum().reset_index()
who_group.head()


Unnamed: 0,WHO Region,No. of cases,No. of deaths
0,Africa,545111852.0,1480850.0
1,Americas,13433321.0,11039.0
2,Eastern Mediterranean,15841260.0,26764.0
3,Europe,112675.0,25.0
4,South-East Asia,38305249.0,49802.0


# WHO regions with the highest incidence


In [25]:
pio.templates.default = "plotly_dark"
fig = px.bar(who_group.sort_values("No. of cases",ascending=False)[::-1],y="No. of cases",x ="WHO Region",text="No. of cases",
             title="WHO regions with the highest incidence from 2000 to 2018",
             color_discrete_sequence= px.colors.qualitative.Set1,height=500,orientation="v")
fig.show()

In [14]:
import plotly.graph_objects as go
colors = ['gold', 'mediumturquoise', 'darkorange', 'lightgreen']

fig = go.Figure(data=[go.Pie(labels=who_group["WHO Region"],
                             values=who_group['No. of cases'])])
fig.update_traces(hoverinfo='label+percent', textinfo='label+percent',textfont_size=20,
                  marker=dict(colors=colors, line=dict(color='#000000', width=2)))
fig.update_layout(width=800,
    height=600)
fig.show()

# DEATH IN WHO REGION

In [26]:
fig = px.bar(who_group.sort_values("No. of deaths",ascending=False)[::-1],y="No. of deaths",x ="WHO Region",text="No. of deaths",
             title="DEATH IN WHO REGION ",
             color_discrete_sequence= px.colors.qualitative.Set1,height=500,orientation="v")
fig.show()

# YEAR WISE GROUPING OF NUMBER OF CASES AND NUMBER OF DEATHS

In [27]:
year_group= df.groupby("Year")[["No. of cases","No. of deaths"]].sum().reset_index()
year_group.head()

Unnamed: 0,Year,No. of cases,No. of deaths
0,2000,5279182.0,21419.0
1,2001,5534764.0,26162.0
2,2002,5335247.0,70683.0
3,2003,8243454.0,91247.0
4,2004,9389638.0,87926.0


# By visualizing the below graph we can infer that with the increase in the year number of cases and number of death also increases.

In [29]:
fig = make_subplots(rows=1,cols=2,column_titles = ('No. of cases','No. of deaths'))

trace_1 = go.Scatter(x=year_group['Year'],y=year_group['No. of cases'],name='Cases',opacity=0.9,mode='lines+markers',line_color='blue')

trace_2 = go.Scatter(x=year_group['Year'],y=year_group['No. of deaths'],name='Deaths',opacity=0.9,mode='lines+markers',line_color='red')

fig.append_trace(trace_1,1,1)
fig.append_trace(trace_2,1,2)

fig.update_layout(title_text="MALARIAL SPREAD YEAR WISE")
fig.show()