In [1]:
#tidy data principle #2: Column names need to be informative, variable names and not values
#tidy data principle #2: each column needs to consist of one and only one variable
#tidy data principle #3: variables need to be in cells, not rows and columns
#tidy data principle #4: each table column needs to have a singular data type 
#tidy data principle #5: a single observational units must be in 1 table

In [2]:
#=====Step 0=====
#Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import io
#import raw dataset from local disk
from google.colab import files
uploaded = files.upload()

Saving UN_MigrantStockTotal_2015.xlsx to UN_MigrantStockTotal_2015.xlsx


In [3]:
#=====Step 1=====
#Define functions in preparation for the dataset
#Preparation Fucntion: load a sheet from excel, drop unecessary headers, organize column names
def prep(table, tb_name, col_name):
      table = pd.read_excel('UN_MigrantStockTotal_2015.xlsx', tb_name)
      table.columns = col_name
      table = table.drop(table.index[0 : 21])
      return table


#Melt function
def melt_func(table, Id_Vars, Value_Vars, Var_Name, Value_Name):
      return table.melt(id_vars = Id_Vars, value_vars = Value_Vars, var_name = Var_Name, value_name = Value_Name)


#Split function 1 (for data recorded by years)
def split_year(table):
      return table.join(table['year and gender'].str.partition(' ')[[0, 2]].rename({0: 'year', 2: 'gender'}, axis=1)).drop("year and gender", axis = 1)


#Split function 2 (for data recorded by periods)
def split_period(table):
      return table.join(table['period and gender'].str.partition(' ')[[0, 2]].rename({0: 'period', 2: 'gender'}, axis=1)).drop("period and gender", axis = 1)


#Defining the variables for functions
#=====Universal=====
value_vars_b = ['1990 all', '1995 all', '2000 all', '2005 all', '2010 all', '2015 all']
value_vars_m = ['1990 m', '1995 m', '2000 m', '2005 m', '2010 m', '2015 m']
value_vars_f = ['1990 f', '1995 f', '2000 f', '2005 f', '2010 f', '2015 f']

value_vars_b_p = ['1990-1995 all', '1995-2000 all', '2000-2005 all', '2005-2010 all', '2010-2015 all']
value_vars_m_p = ['1990-1995 m', '1995-2000 m', '2000-2005 m', '2005-2010 m', '2010-2015 m']
value_vars_f_p = ['1990-1995 f', '1995-2000 f', '2000-2005 f', '2005-2010 f', '2010-2015 f']

var_name_year = 'year and gender'
var_name_period = 'period and gender'
#Import and tidy up ANNEX from the raw data
annex = pd.read_excel('UN_MigrantStockTotal_2015.xlsx', 'ANNEX')
annex.columns = ['Code', 'Country or area', 'Order', 'Major area', 'Code', 'Order', 'Region', 'Code', 'Order', 'Developed region', 'Least developed country', 'Sub-Saharan Africa']
annex = annex.drop(annex.index[0 : 14])


#Identiy unique major areas present in the excel
major_area_list = annex['Major area'].unique()
#Identiy unique regions present in the excel
region_list = annex['Region'].unique()

#Define function for extracting country and area from column "area"
def extract_country(table):
      table = table[~table['area'].isin(region_list)]
      table = table[~table['area'].isin(major_area_list)]
      return table


#Define function for extracting regions from column "area"
def extract_region(table):
      table = table[table['area'].isin(region_list)]
      return table


#Define function for extracting major area from column "area"
def extract_majorarea(table):
      table = table[table['area'].isin(major_area_list)]
      return table




=============================Table 1==================================
`
`



In [4]:
#Defining the variables for functions
#Table name is
tb_name = 'Table 1'
#Names of the columns in the original dataset
col_name = ['order',	'area', 'notes',	'country_code', 'type', '1990 all', '1995 all', '2000 all', '2005 all', '2010 all', '2015 all', '1990 m', '1995 m', '2000 m', '2005 m', '2010 m', '2015 m', '1990 f', '1995 f', '2000 f', '2005 f', '2010 f', '2015 f']
#Which columns to keep as is for melt
id_vars = ['order',	'area', 'notes', 'country_code', 'type']
#Name of column after melt
value_name = "International migrant stock at mid-year"
value_name_b = f"{value_name} (both sexes)"
value_name_m = f"{value_name} (male)"
value_name_f = f"{value_name} (female)"

#Getting Table 1 ready to process
table = df_tb1 = None
table = prep(table, tb_name, col_name)

#melt & split by sex
table_b = melt_func(table, id_vars, value_vars_b, var_name_year, value_name_b)
table_b = split_year(table_b)

table_m = melt_func(table, id_vars, value_vars_m, var_name_year, value_name_m)
table_m = split_year(table_m)

table_f = melt_func(table, id_vars, value_vars_f, var_name_year, value_name_f)
table_f = split_year(table_f)
table_m

#Combine results from male, female, and both sexes
table = table_b[['order', 'area', 'notes', 'country_code', 'type', 'year', value_name_b]].join(table_m[value_name_m]).join(table_f[value_name_f])
#Rename columns in preparation for melt
table.columns = ['order', 'area', 'notes',	'country_code', 'type', 'year', 'both sexes', 'male', 'female']
#melt migrant stock valuse for male, female, and both sexes
table = table.melt(id_vars = ['order',	'area', 'notes',	'country_code', 'type', 'year'], value_vars = ['both sexes', 'male', 'female'], var_name = 'gender', value_name = value_name)
#Replace empty values with "NaN"
df_tb1 = table.replace(to_replace =['..'],value =['NaN'])

#show results
df_tb1


Unnamed: 0,order,area,notes,country_code,type,year,gender,International migrant stock at mid-year
0,7,Africa,,903,,1990,both sexes,15690623
1,8,Eastern Africa,,910,,1990,both sexes,5964031
2,9,Burundi,,108,B R,1990,both sexes,333110
3,10,Comoros,,174,B,1990,both sexes,14079
4,11,Djibouti,,262,B R,1990,both sexes,122221
...,...,...,...,...,...,...,...,...
4657,261,Samoa,,882,B,2015,female,2460.0
4658,262,Tokelau,,772,B,2015,female,254.0
4659,263,Tonga,,776,B,2015,female,2604.0
4660,264,Tuvalu,,798,C,2015,female,63.0


In [5]:
#Defining the variables for functions
#Table name is
tb_name = 'Table 2'
#Names of the columns in the original dataset
col_name = ['order',	'area', 'notes',	'country_code', '1990 all', '1995 all', '2000 all', '2005 all', '2010 all', '2015 all', '1990 m', '1995 m', '2000 m', '2005 m', '2010 m', '2015 m', '1990 f', '1995 f', '2000 f', '2005 f', '2010 f', '2015 f']
#Which columns to keep as is for melt
id_vars = ['order',	'area', 'notes', 'country_code']
#Name of column after melt
value_name = "Total population at mid-year (thousands)"
value_name_b = f"{value_name} (both sexes)"
value_name_m = f"{value_name} (male)"
value_name_f = f"{value_name} (female)"

#Getting Table 2 ready to process
table = df_tb2 = None
table = prep(table, tb_name, col_name)

#melt & split by sex
table_b = melt_func(table, id_vars, value_vars_b, var_name_year, value_name_b)
table_b = split_year(table_b)

table_m = melt_func(table, id_vars, value_vars_m, var_name_year, value_name_m)
table_m = split_year(table_m)

table_f = melt_func(table, id_vars, value_vars_f, var_name_year, value_name_f)
table_f = split_year(table_f)

#Combine results from male, female, and both sexes
table = table_b[['order', 'area', 'notes', 'country_code', 'year', value_name_b]].join(table_m[value_name_m]).join(table_f[value_name_f])
#Rename columns in preparation for melt
table.columns = ['order', 'area', 'notes',	'country_code', 'year', 'both sexes', 'male', 'female']
#melt migrant stock valuse for male, female, and both sexes
table = table.melt(id_vars = ['order',	'area', 'notes',	'country_code', 'year'], value_vars = ['both sexes', 'male', 'female'], var_name = 'gender', value_name = value_name)
#Replace empty values with "NaN"
df_tb2 = table.replace(to_replace =['..'],value =['NaN'])

#show results
df_tb2

Unnamed: 0,order,area,notes,country_code,year,gender,Total population at mid-year (thousands)
0,7,Africa,,903,1990,both sexes,631614.304
1,8,Eastern Africa,,910,1990,both sexes,198231.687
2,9,Burundi,,108,1990,both sexes,5613.141
3,10,Comoros,,174,1990,both sexes,415.144
4,11,Djibouti,,262,1990,both sexes,588.356
...,...,...,...,...,...,...,...
4657,261,Samoa,,882,2015,female,93.584
4658,262,Tokelau,,772,2015,female,
4659,263,Tonga,,776,2015,female,52.931
4660,264,Tuvalu,,798,2015,female,


=============================Table 3==================================




In [6]:
#Defining the variables for functions
#Table name is
tb_name = 'Table 3'
#Names of the columns in the original dataset
col_name = ['order',	'area', 'notes',	'country_code', 'type', '1990 all', '1995 all', '2000 all', '2005 all', '2010 all', '2015 all', '1990 m', '1995 m', '2000 m', '2005 m', '2010 m', '2015 m', '1990 f', '1995 f', '2000 f', '2005 f', '2010 f', '2015 f']
#Which columns to keep as is for melt
id_vars = ['order',	'area', 'notes', 'country_code', 'type']
#Name of column after melt
value_name = "International migrant stock as a percentage of the total population"
value_name_b = f"{value_name} (both sexes)"
value_name_m = f"{value_name} (male)"
value_name_f = f"{value_name} (female)"

#Getting Table 3 ready to process
table = df_tb3 = None
table = prep(table, tb_name, col_name)

#melt & split by sex
table_b = melt_func(table, id_vars, value_vars_b, var_name_year, value_name_b)
table_b = split_year(table_b)

table_m = melt_func(table, id_vars, value_vars_m, var_name_year, value_name_m)
table_m = split_year(table_m)

table_f = melt_func(table, id_vars, value_vars_f, var_name_year, value_name_f)
table_f = split_year(table_f)

#Combine results from male, female, and both sexes
table = table_b[['order', 'area', 'notes', 'country_code', 'type', 'year', value_name_b]].join(table_m[value_name_m]).join(table_f[value_name_f])
#Rename columns in preparation for melt
table.columns = ['order', 'area', 'notes',	'country_code', 'type', 'year', 'both sexes', 'male', 'female']
#melt migrant stock valuse for male, female, and both sexes
table = table.melt(id_vars = ['order',	'area', 'notes',	'country_code', 'type', 'year'], value_vars = ['both sexes', 'male', 'female'], var_name = 'gender', value_name = value_name)
#Replace empty values with "NaN"
df_tb3 = table.replace(to_replace =['..'],value =['NaN'])

#show results
df_tb3

Unnamed: 0,order,area,notes,country_code,type,year,gender,International migrant stock as a percentage of the total population
0,7,Africa,,903,,1990,both sexes,2.48421
1,8,Eastern Africa,,910,,1990,both sexes,3.008616
2,9,Burundi,,108,B R,1990,both sexes,5.934467
3,10,Comoros,,174,B,1990,both sexes,3.391353
4,11,Djibouti,,262,B R,1990,both sexes,20.773307
...,...,...,...,...,...,...,...,...
4657,261,Samoa,,882,B,2015,female,2.628654
4658,262,Tokelau,,772,B,2015,female,
4659,263,Tonga,,776,B,2015,female,4.919612
4660,264,Tuvalu,,798,C,2015,female,


=============================Table 4==================================


In [7]:
#Defining the variables for functions
#Table name is
tb_name = 'Table 4'
#Names of the columns in the original dataset
col_name = ['order',	'area', 'notes',	'country_code', 'type', '1990 f', '1995 f', '2000 f', '2005 f', '2010 f', '2015 f']
#Which columns to keep as is for melt
id_vars = ['order',	'area', 'notes', 'country_code', 'type']
#Name of column after melt
value_name = "Female migrants as a percentage of the international migrant stock"
value_name_f = value_name

#Getting Table 4 ready to process
table = df_tb4 = None
table = prep(table, tb_name, col_name)

#melt & split by sex
table = melt_func(table, id_vars, value_vars_f, var_name_year, value_name_f)
table = split_year(table)

#Rename columns in preparation for melt
table.columns = ['order', 'area', 'notes', 'country_code', 'type', 'Female migrants as a percentage of the international migrant stock', 'year', 'gender']
#Replace empty values with "NaN"
table = table.replace(to_replace =['..'],value =['NaN'])

table = table.replace(to_replace =['f'],value =['female'])
df_tb4 = table[['order', 'area', 'notes', 'country_code', 'type', 'year', 'gender', 'Female migrants as a percentage of the international migrant stock']]

#show results
df_tb4


Unnamed: 0,order,area,notes,country_code,type,year,gender,Female migrants as a percentage of the international migrant stock
0,7,Africa,,903,,1990,female,47.232408
1,8,Eastern Africa,,910,,1990,female,48.504812
2,9,Burundi,,108,B R,1990,female,50.987061
3,10,Comoros,,174,B,1990,female,52.290646
4,11,Djibouti,,262,B R,1990,female,47.437838
...,...,...,...,...,...,...,...,...
1549,261,Samoa,,882,B,2015,female,49.908704
1550,262,Tokelau,,772,B,2015,female,52.156057
1551,263,Tonga,,776,B,2015,female,45.437096
1552,264,Tuvalu,,798,C,2015,female,44.680851


=============================Table 5==================================

In [8]:
#Defining the variables for functions
#Table name is
tb_name = 'Table 5'
#Names of the columns in the original dataset
col_name = ['order',	'area', 'notes',	'country_code', 'type', '1990-1995 all', '1995-2000 all', '2000-2005 all', '2005-2010 all', '2010-2015 all', '1990-1995 m', '1995-2000 m', '2000-2005 m', '2005-2010 m', '2010-2015 m', '1990-1995 f', '1995-2000 f', '2000-2005 f', '2005-2010 f', '2010-2015 f']
#Which columns to keep as is for melt
id_vars = ['order',	'area', 'notes', 'country_code', 'type']
#Name of column after melt
value_name = "Annual rate of change of the migrant stock"
value_name_b = f"{value_name} (both sexes)"
value_name_m = f"{value_name} (male)"
value_name_f = f"{value_name} (female)"

#Getting Table 5 ready to process
table = df_tb5 = None
table = prep(table, tb_name, col_name)

#melt & split by sex
table_b = melt_func(table, id_vars, value_vars_b_p, var_name_period, value_name_b)
table_b = split_period(table_b)

table_m = melt_func(table, id_vars, value_vars_m_p, var_name_period, value_name_m)
table_m = split_period(table_m)

table_f = melt_func(table, id_vars, value_vars_f_p, var_name_period, value_name_f)
table_f = split_period(table_f)

#Combine results from male, female, and both sexes
table = table_b[['order', 'area', 'notes', 'country_code', 'type', 'period', value_name_b]].join(table_m[value_name_m]).join(table_f[value_name_f])

#Rename columns in preparation for melt
table.columns = ['order', 'area', 'notes',	'country_code', 'type', 'period', 'both sexes', 'male', 'female']
#melt migrant stock valuse for male, female, and both sexes
table = table.melt(id_vars = ['order',	'area', 'notes',	'country_code', 'type', 'period'], value_vars = ['both sexes', 'male', 'female'], var_name = 'gender', value_name = value_name)
#Replace empty values with "NaN"
df_tb5 = table.replace(to_replace =['..'],value =['NaN'])

#show results
df_tb5

Unnamed: 0,order,area,notes,country_code,type,period,gender,Annual rate of change of the migrant stock
0,7,Africa,,903,,1990-1995,both sexes,0.826734
1,8,Eastern Africa,,910,,1990-1995,both sexes,-3.435412
2,9,Burundi,,108,B R,1990-1995,both sexes,-5.355717
3,10,Comoros,,174,B,1990-1995,both sexes,-0.199873
4,11,Djibouti,,262,B R,1990-1995,both sexes,-4.058465
...,...,...,...,...,...,...,...,...
3880,261,Samoa,,882,B,2010-2015,female,-0.545343
3881,262,Tokelau,,772,B,2010-2015,female,2.60325
3882,263,Tonga,,776,B,2010-2015,female,2.526318
3883,264,Tuvalu,,798,C,2010-2015,female,-1.819436


=============================Table 6==================================

In [9]:
#Defining the variables for functions
#Table name is
tb_name = 'Table 6'
#Names of the columns in the original dataset
col_name = ['order',	'area', 'notes',	'country_code', 'type', '1990', '1995', '2000', '2005', '2010', '2015', '1990 b', '1995 b', '2000 b', '2005 b', '2010 b', '2015 b', '1990-1995', '1995-2000', '2000-2005', '2005-2010', '2010-2015']
#Which columns to keep as is for melt
id_vars = ['order',	'area', 'notes', 'country_code', 'type']
#Name of column after melt
value_name_a = 'Estimated refugee stock at mid-year (both sexes)'
value_name_b = 'Refugees as a percentage of the international migrant stock'
value_name_c = 'Annual rate of change of the refugee stock'

value_vars_a = ['1990', '1995', '2000', '2005', '2010', '2015']
value_vars_b = ['1990 b', '1995 b', '2000 b', '2005 b', '2010 b', '2015 b']
value_vars_c = ['1990-1995', '1995-2000', '2000-2005', '2005-2010', '2010-2015']

var_name_year = 'year'
var_name_period = 'period'


#Getting Table 6 ready to process
table = df_tb6 = None
table = prep(table, tb_name, col_name)

#Processing 3 categories individually
table_a = melt_func(table, id_vars, value_vars_a, var_name_year, value_name_a)
df_tb6_a = table_a.replace(to_replace =['..'],value =['NaN'])

table_b = melt_func(table, id_vars, value_vars_b, var_name_year, value_name_b)
table_b = table_b.replace(to_replace =['1990 b', '1995 b', '2000 b', '2005 b', '2010 b', '2015 b'],value =['1990', '1995', '2000', '2005', '2010', '2015'])
df_tb6_b = table_b.replace(to_replace =['..'],value =['NaN'])

table_c = melt_func(table, id_vars, value_vars_c, var_name_period, value_name_c)
df_tb6_c = table_c.replace(to_replace =['..'],value =['NaN'])

#show results
df_tb6_a


Unnamed: 0,order,area,notes,country_code,type,year,Estimated refugee stock at mid-year (both sexes)
0,7,Africa,,903,,1990,5687352
1,8,Eastern Africa,,910,,1990,3168001
2,9,Burundi,,108,B R,1990,267929
3,10,Comoros,,174,B,1990,0
4,11,Djibouti,,262,B R,1990,54508
...,...,...,...,...,...,...,...
1549,261,Samoa,,882,B,2015,0.0
1550,262,Tokelau,,772,B,2015,0.0
1551,263,Tonga,,776,B,2015,0.0
1552,264,Tuvalu,,798,C,2015,0.0


In [10]:
#=====Naming Convention=====
#tb1 = International migrant stock at mid-year by sex and by major area, region, country or area, 1990-2015
#tb2 = Total population at mid-year by sex and by major area, region, country or area, 1990-2015 (thousands)
#tb3 = International migrant stock as a percentage of the total population by sex and by major area, region, country or area, 1990-2015
#tb4 = Female migrants as a percentage of the international migrant stock by major area, region, country or area, 1990-2015
#tb5 = Annual rate of change of the migrant stock by sex and by major area, region, country or area, 1990-2015 (percentage)
#tb6_a = Estimated refugee stock at mid-year (both sexes)
#tb6_b = Refugees as a percentage of the international migrant stock
#tb6_c = Annual rate of change of the refugee stock
#[ca] = Country and Area
#[ma] = Major area
#[rg] = Region
#For more information on how country and area/major area/region is categorized, please consult the original excel's "ANNEX" table. 

df_tb1_ca = extract_country(df_tb1)
df_tb1_ca.columns = ['Order', 'Country and Area', 'Notes',	'Country Code', 'Type', 'Year', 'Gender',	'International migrant stock at mid-year']
df_tb1_ma = extract_majorarea(df_tb1)
df_tb1_ma.columns = ['Order', 'Major Area', 'Notes',	'Country Code', 'Type', 'Year', 'Gender',	'International migrant stock at mid-year']
df_tb1_rg = extract_region(df_tb1)
df_tb1_rg.columns = ['Order', 'Region', 'Notes',	'Country Code', 'Type', 'Year', 'Gender',	'International migrant stock at mid-year']

df_tb2_ca = extract_country(df_tb2)
df_tb2_ca.columns = ['Order', 'Country and Area', 'Notes',	'Country Code', 'Year', 'Gender',	"Total population at mid-year (thousands)"]
df_tb2_ma = extract_majorarea(df_tb2)
df_tb2_ma.columns = ['Order', 'Major Area', 'Notes',	'Country Code', 'Year', 'Gender',	"Total population at mid-year (thousands)"]
df_tb2_rg = extract_region(df_tb2)
df_tb2_rg.columns = ['Order', 'Region', 'Notes',	'Country Code', 'Year', 'Gender',	"Total population at mid-year (thousands)"]

df_tb3_ca = extract_country(df_tb3)
df_tb3_ca.columns = ['Order', 'Country and Area', 'Notes',	'Country Code', 'Type', 'Year', 'Gender',	'International migrant stock as a percentage of the total population']
df_tb3_ma = extract_majorarea(df_tb3)
df_tb3_ma.columns = ['Order', 'Major Area', 'Notes',	'Country Code', 'Type', 'Year', 'Gender',	'International migrant stock as a percentage of the total population']
df_tb3_rg = extract_region(df_tb3)
df_tb3_rg.columns = ['Order', 'Region', 'Notes',	'Country Code', 'Type', 'Year', 'Gender',	'International migrant stock as a percentage of the total population']

df_tb4_ca = extract_country(df_tb4)
df_tb4_ca.columns = ['Order', 'Country and Area', 'Notes',	'Country Code', 'Type', 'Year', 'Gender',	'Female migrants as a percentage of the international migrant stock']
df_tb4_ma = extract_majorarea(df_tb4)
df_tb4_ma.columns = ['Order', 'Major Area', 'Notes',	'Country Code', 'Type', 'Year', 'Gender',	'Female migrants as a percentage of the international migrant stock']
df_tb4_rg = extract_region(df_tb4)
df_tb4_rg.columns = ['Order', 'Region', 'Notes',	'Country Code', 'Type', 'Year', 'Gender',	'Female migrants as a percentage of the international migrant stock']

df_tb5_ca = extract_country(df_tb5)
df_tb5_ca.columns = ['Order', 'Country and Area', 'Notes',	'Country Code', 'Type', 'Year', 'Gender',	'Annual rate of change of the migrant stock']
df_tb5_ma = extract_majorarea(df_tb5)
df_tb5_ma.columns = ['Order', 'Major Area', 'Notes',	'Country Code', 'Type', 'Year', 'Gender',	'Annual rate of change of the migrant stock']
df_tb5_rg = extract_region(df_tb5)
df_tb5_rg.columns = ['Order', 'Region', 'Notes',	'Country Code', 'Type', 'Year', 'Gender',	'Annual rate of change of the migrant stock']

df_tb6_a_ca = extract_country(df_tb6_a)
df_tb6_a_ca.columns = ['Order', 'Country and Area', 'Notes',	'Country Code', 'Type', 'Year',	'Estimated refugee stock at mid-year (both sexes)']
df_tb6_a_ma = extract_majorarea(df_tb6_a)
df_tb6_a_ma.columns = ['Order', 'Major Area', 'Notes',	'Country Code', 'Type', 'Year',	'Estimated refugee stock at mid-year (both sexes)']
df_tb6_a_rg = extract_region(df_tb6_a)
df_tb6_a_rg.columns = ['Order', 'Region', 'Notes',	'Country Code', 'Type', 'Year','Estimated refugee stock at mid-year (both sexes)']

df_tb6_b_ca = extract_country(df_tb6_b)
df_tb6_b_ca.columns = ['Order', 'Country and Area', 'Notes',	'Country Code', 'Type', 'Year', 'Refugees as a percentage of the international migrant stock']
df_tb6_b_ma = extract_majorarea(df_tb6_b)
df_tb6_b_ma.columns = ['Order', 'Major Area', 'Notes',	'Country Code', 'Type', 'Year', 'Refugees as a percentage of the international migrant stock']
df_tb6_b_rg = extract_region(df_tb6_b)
df_tb6_b_rg.columns = ['Order', 'Region', 'Notes',	'Country Code', 'Type', 'Year', 'Refugees as a percentage of the international migrant stock']

df_tb6_c_ca = extract_country(df_tb6_c)
df_tb6_c_ca.columns = ['Order', 'Country and Area', 'Notes',	'Country Code', 'Type', 'Period', 'Annual rate of change of the refugee stock']
df_tb6_c_ma = extract_majorarea(df_tb6_c)
df_tb6_c_ma.columns = ['Order', 'Major Area', 'Notes',	'Country Code', 'Type', 'Period', 'Annual rate of change of the refugee stock']
df_tb6_c_rg = extract_region(df_tb6_c)
df_tb6_c_rg.columns = ['Order', 'Region', 'Notes',	'Country Code', 'Type', 'Period', 'Annual rate of change of the refugee stock']

#Show one result
df_tb6_c_rg

Unnamed: 0,Order,Region,Notes,Country Code,Type,Period,Annual rate of change of the refugee stock
1,8,Eastern Africa,,910,,1990-1995,-5.30801
22,29,Middle Africa,,911,,1990-1995,12.964162
32,39,Northern Africa,,912,,1990-1995,-3.456178
40,47,Southern Africa,,913,,1990-1995,-1.954547
46,53,Western Africa,,914,,1990-1995,8.717581
...,...,...,...,...,...,...,...
1261,232,Northern America,,905,,2010-2015,-1.526424
1268,239,Australia and New Zealand,,927,,2010-2015,8.829439
1271,242,Melanesia,,928,,2010-2015,-0.268521
1277,248,Micronesia,,954,,2010-2015,
