In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import numpy as np

In [None]:
pd.set_option('display.max_columns', None)
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (15, 5)

In [None]:
df = pd.read_excel('Canada.xlsx',sheet_name='Canada by Citizenship',skiprows=20,skipfooter=2)
df.head()

## clean data for visualization

In [None]:
cols = df.columns.tolist()
print(cols)

In [None]:
cols_to_drop = ['Type', 'Coverage', 'AREA', 'REG', 'DEV', 'DevName',]
df.drop(cols_to_drop, axis=1, inplace=True)
df.head(5)

In [None]:
# rename
df.rename(columns={
    'OdName': 'Country',
    'AreaName': 'Continent',
    'RegName': 'Region',
}, inplace=True)
df.head()

In [None]:
# since every country is unique we can make it an index col
df.set_index('Country', inplace=True)
df.head()

In [None]:
df.loc['Afghanistan',range(1980,2014)].sum()

In [None]:
df.loc['China',range(1980,2014)].sum()

In [None]:
df['total'] = df.sum(axis=1)

In [None]:
df.head()

In [None]:
df.sort_values(by='total', inplace=True)
df.head()

# visualization chart
- Line plot
- Area chart
- pie chart
- scatter plot
- histogram
- world map

##### growth or decay - line plot, area plot
##### comparison b/w values - bar plot or pie chart
##### frequency distribution - Histogram, Kde, boxplot
##### relation b/w 2 or 3 values - Scatter 
##### geographical view -  Map (folium)

# 1. Growth or Decay

In [None]:
years= range(1980, 2014)

In [None]:
print(df.index.tolist())

In [None]:
df.loc['Philippines',years].plot(style='--o', color='purple',title='Immigration data for Philipines')
plt.xticks(years,rotation=90)
plt.savefig('ip.png')

In [None]:
df.loc[['Philippines','Poland'],years].T # transposing the data

In [None]:
df.loc[['Philippines','Poland'],years].T.plot(style='--o',title='Immigration data for Philipines')
plt.xticks(years,rotation=90)
plt.show()

In [None]:
df.loc['Philippines',years].plot(kind='area', color='#893492',title='Immigration data for Philipines')
plt.xticks(years,rotation=90)
plt.show()

In [None]:
# stacked area plot
df.loc[['Philippines','Poland'],years].T.plot(kind='area',title='Immigration data for Philipines',alpha=.5)
plt.xticks(years,rotation=90)
plt.show()

In [None]:
# unstacked area plot
df.loc[['Philippines','India'],years].T.plot(kind='area',title='Immigration data for Philipines',alpha=.5, stacked=False)
plt.xticks(years,rotation=90)
plt.show()

In [None]:
china_df = df.loc['China',years].reset_index()
fig = px.line(china_df,x='index',y='China')
fig.show()

In [None]:
px.area(china_df,x='index',y='China',orientation='v')

In [None]:
df.rename({'United Kingdom of Great Britain and Northern Ireland':"Britain"},inplace=True)

In [None]:
countries_top5 = df.index.tolist()[-5:]
countries_top5

In [None]:
df.loc[countries_top5,2000].plot(kind='bar')

In [None]:
df.loc[countries_top5,2000].plot(kind='pie',autopct='%.2f%%',explode=[0,0,.1,0,0],radius=1.5,startangle=90)

In [None]:
df.loc[countries_top5,[1980,1990,2000,2010]].plot(
                                                kind='pie',
                                                autopct='%.2f%%',
                                                explode=[0,0,.1,0,0],
                                                radius=1,startangle=90,
                                                subplots=True,figsize=(30,8))
plt.show()

In [None]:
df.loc[countries_top5,2000].plot(kind='pie',autopct='%.2f%%',
                                explode=[0,0,.1,0,0],radius=1.5,startangle=90,
                                colors=['#909090','#832112','#ff8902','red','#7878ff'])

In [None]:
df.iloc[-50:][2013].plot(kind='bar')
plt.show()

In [None]:
df_2013 = df.iloc[-50:][2013].reset_index()
px.bar(df_2013,x='Country',y=2013,height=600,title='50 Countries immigration data from 2010')

In [None]:
px.bar_polar(df_2013,r='Country',theta=2013,title='50 Countries immigration data from 2010',)

In [None]:
df_2000_top_5 = df.loc[countries_top5,2000].reset_index()
px.pie(df_2000_top_5,names='Country',values=2000)