In [2]:
################################################################################
# Source code for processing data and exporting files for plotting results in Figure 1-4
# Date of this version: 2023-01-15
# of the paper:
#          "A gender perspective on the global migration of scholars"
#
# CITATION: XXXXXXXXXXXXXXXXXX
# DOI: XXXXXXXXXXXX
#
# Author of the code: Xinyi Zhao
# ORCID: 0000-0002-2552-7795
# Institution1: Max Planck Institute for Demographic Research, Rostock, Germany
# Institution2: Leverhulme Centre for Demographic Science, Department of
#               Sociology, University of Oxford, Oxford, UK
# WWW: https://www.demogr.mpg.de/en/about_us_6113/staff_directory_1899/xinyi_zhao_4083/
# Email: zhao@demogr.mpg.de
# Email2: xinyi.zhao@st-hughs.ox.ac.uk
################################################################################


<a id='index'></a>

## Index

### 1. [Data Input](#read)
### 2. [Country-level Spreads](#ctrspread)
### 3. [Global-level Spreads](#glospread)
### 4. [Preferred destinations by gender on global gender](#glodestination)
### 5. [Preferred destinations by gender on countryl gender](#ctrdestination)

In [9]:
import pandas as pd

encoding = 'utf-8'

pd.set_option('max_colwidth',100)
pd.set_option('float_format', lambda x: '%.4f' % x)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 20)

# data URLs
import os

data_dir = os.path.join("..", 'Aggregated_Academic_Migration_Data')
output_dir = os.path.join("..", 'For_figure_1_3_R')

In [2]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

from functools import reduce

In [3]:
dic_period5 = {
1: "1998-2002",
2: "2003-2007",
3: "2008-2012",
4: "2013-2017",
     }

In [4]:
import pycountry_convert as pc
continents = {
    'NA': 'North America',
    'SA': 'South America', 
    'AS': 'Asia',
    'OC': 'Oceania',
    'AF': 'Africa',
    'EU': 'Europe'
}


def contry_to_iso2 (country): 
    try: 
        country_code = pc.country_name_to_country_alpha2(country, cn_name_format="default")
    except:
        country_code ="unknown"
        
    return country_code

<a id='read'></a>

### 1. Data Input

#### Data Location: ...\Aggregated_Academic_MIgration_Data\  

##### return to [index](#index)

In [11]:
# Country-level inflows and outflows by gender at 4 periods
net_researcher_country_period = pd.read_csv(os.path.join(
    data_dir, "0_gendered_net_migration_all.csv"))

# Migration Origin-Destination (OD) flows by gender 
all_female_male_flow = pd.read_csv(os.path.join(
    data_dir, "0_OD_migration_flow.csv"))

# Global-level academic migration tally 
global_population_period_summary = pd.read_csv(os.path.join(
    data_dir, "1_aggregated_global_level_migrant.csv"))

In [12]:
# a brief look at imported data
net_researcher_country_period.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,residenceCountry,period,outflow_period,female_outflow_period,male_outflow_period,inflow_period,female_inflow_period,male_inflow_period,net_all_period,net_female_period,net_male_period,netrate_all_period,netrate_female_period,netrate_male_period,gendered_inflow,gendered_outflow,iso2
0,0,0,Afghanistan,1,1.0,0.0,1.0,0.0,0.0,0.0,-1.0,0.0,-1.0,-0.6667,0.0,-0.6667,0.0,0.0,AF
1,1,1,Afghanistan,2,1.0,0.0,1.0,0.0,0.0,0.0,-1.0,0.0,-1.0,-0.069,0.0,-0.1176,0.0,0.0,AF
2,2,2,Afghanistan,3,2.0,0.0,2.0,4.0,2.0,2.0,2.0,2.0,0.0,0.0212,0.0909,0.0,1.0,0.0,AF
3,3,3,Afghanistan,4,8.0,2.0,6.0,9.0,1.0,8.0,1.0,-1.0,2.0,0.0076,-0.0303,0.0255,0.125,0.3333,AF
4,4,4,Albania,1,3.0,1.0,2.0,4.0,1.0,3.0,1.0,0.0,1.0,0.0299,0.0,0.05,0.3333,0.5,AL


In [31]:
all_female_male_flow.head()

Unnamed: 0.1,Unnamed: 0,from,to,period,all_period_flow,female_period_flow,male_period_flow,period_year
0,11941,Lebanon,United Kingdom,1.0,8,4.0,4.0,1998-2002
1,8858,India,Kenya,1.0,6,0.0,5.0,1998-2002
2,4089,China,Cuba,1.0,1,0.0,1.0,1998-2002
3,15840,Poland,Georgia,1.0,1,1.0,0.0,1998-2002
4,13500,Nepal,Denmark,1.0,3,0.0,3.0,1998-2002


In [32]:
global_population_period_summary.head()

Unnamed: 0.1,Unnamed: 0,period,migration_count,female_migration_count,male_migration_count,migrant_count,female_migrant_count,male_migrant_count,all_period,female_period,male_period,gender_ratio_migration,gender_ratio_migrant,gender_ratio_all
0,0,1.0,155445,35885,115580,124189,29161,91684,2431637,684169,1538775,0.3105,0.3181,0.4446
1,1,2.0,183315,48751,129324,155245,41717,108958,3477348,1052801,2063423,0.377,0.3829,0.5102
2,2,3.0,247253,72370,167780,208408,61699,140594,4775088,1531803,2712570,0.4313,0.4388,0.5647
3,3,4.0,302685,93761,200110,254104,79370,167182,5236562,1705969,2954493,0.4685,0.4748,0.5774


In [13]:
# keep only needed columns
global_migration_period_summary=global_population_period_summary[["period","migration_count","female_migration_count","male_migration_count","migrant_count","female_migrant_count","male_migrant_count"]]


In [34]:
global_migration_period_summary.head()

Unnamed: 0,period,migration_count,female_migration_count,male_migration_count,migrant_count,female_migrant_count,male_migrant_count
0,1.0,155445,35885,115580,124189,29161,91684
1,2.0,183315,48751,129324,155245,41717,108958
2,3.0,247253,72370,167780,208408,61699,140594
3,4.0,302685,93761,200110,254104,79370,167182


<a id='ctrspread'></a>

### 2. Country-level emigration/ immigration spread

##### return to [index](#index)

In [None]:
# *******************flows***************************************

In [14]:
net_1=net_researcher_country_period[["residenceCountry","period","outflow_period","female_outflow_period","male_outflow_period"]]

country_emigration=net_1.merge(all_female_male_flow,left_on=["residenceCountry","period"],right_on=["from","period"],how="left")

net_2=net_researcher_country_period[["residenceCountry","period","inflow_period","female_inflow_period","male_inflow_period"]]

country_immigration=net_2.merge(all_female_male_flow,left_on=["residenceCountry","period"],right_on=["to","period"],how="left")

In [55]:
net_1.head()

Unnamed: 0,residenceCountry,period,outflow_period,female_outflow_period,male_outflow_period
0,Afghanistan,1,1.0,0.0,1.0
1,Afghanistan,2,1.0,0.0,1.0
2,Afghanistan,3,2.0,0.0,2.0
3,Afghanistan,4,8.0,2.0,6.0
4,Albania,1,3.0,1.0,2.0


In [56]:
net_2.head()

Unnamed: 0,residenceCountry,period,inflow_period,female_inflow_period,male_inflow_period
0,Afghanistan,1,0.0,0.0,0.0
1,Afghanistan,2,0.0,0.0,0.0
2,Afghanistan,3,4.0,2.0,2.0
3,Afghanistan,4,9.0,1.0,8.0
4,Albania,1,4.0,1.0,3.0


In [57]:
country_emigration.head()

Unnamed: 0.1,residenceCountry,period,outflow_period,female_outflow_period,male_outflow_period,Unnamed: 0,from,to,all_period_flow,female_period_flow,male_period_flow,period_year
0,Afghanistan,1,1.0,0.0,1.0,6.0,Afghanistan,Russia,1.0,0.0,1.0,1998-2002
1,Afghanistan,2,1.0,0.0,1.0,9.0,Afghanistan,United Kingdom,1.0,0.0,1.0,2003-2007
2,Afghanistan,3,2.0,0.0,2.0,3.0,Afghanistan,Iran,2.0,0.0,2.0,2008-2012
3,Afghanistan,4,8.0,2.0,6.0,0.0,Afghanistan,Belize,1.0,0.0,1.0,2013-2017
4,Afghanistan,4,8.0,2.0,6.0,1.0,Afghanistan,China,1.0,0.0,1.0,2013-2017


In [58]:
country_immigration.head()

Unnamed: 0.1,residenceCountry,period,inflow_period,female_inflow_period,male_inflow_period,Unnamed: 0,from,to,all_period_flow,female_period_flow,male_period_flow,period_year
0,Afghanistan,1,0.0,0.0,0.0,,,,,,,
1,Afghanistan,2,0.0,0.0,0.0,,,,,,,
2,Afghanistan,3,4.0,2.0,2.0,21601.0,United Kingdom,Afghanistan,1.0,0.0,1.0,2008-2012
3,Afghanistan,3,4.0,2.0,2.0,3057.0,Burundi,Afghanistan,1.0,1.0,0.0,2008-2012
4,Afghanistan,3,4.0,2.0,2.0,22152.0,United States,Afghanistan,1.0,1.0,0.0,2008-2012


In [15]:
### calculate the emigration & immigration spreads, no matter the gender
country_emigration=country_emigration[country_emigration["from"].notnull()]
country_emigration["ES_ctr"]=(country_emigration["all_period_flow"]/country_emigration["outflow_period"])**2
country_emigration["ES_ctr"]=country_emigration.groupby(["period","residenceCountry"])["ES_ctr"].transform("sum")
country_emigration["ES_ctr"]=1-country_emigration["ES_ctr"]

country_immigration=country_immigration[country_immigration["to"].notnull()]
country_immigration["IS_ctr"]=(country_immigration["all_period_flow"]/country_immigration["inflow_period"])**2
country_immigration["IS_ctr"]=country_immigration.groupby(["period","residenceCountry"])["IS_ctr"].transform("sum")
country_immigration["IS_ctr"]=1-country_immigration["IS_ctr"]

In [16]:
### calculate the emigration & immigration spreads by gender

country_emigration["ES_ctr_female"]=(country_emigration["female_period_flow"]/country_emigration["female_outflow_period"])**2
country_emigration["ES_ctr_female"]=country_emigration.groupby(["period","residenceCountry"])["ES_ctr_female"].transform("sum")
country_emigration["ES_ctr_female"]=1-country_emigration["ES_ctr_female"]

country_immigration["IS_ctr_female"]=(country_immigration["female_period_flow"]/country_immigration["female_inflow_period"])**2
country_immigration["IS_ctr_female"]=country_immigration.groupby(["period","residenceCountry"])["IS_ctr_female"].transform("sum")
country_immigration["IS_ctr_female"]=1-country_immigration["IS_ctr_female"]

country_emigration["ES_ctr_male"]=(country_emigration["male_period_flow"]/country_emigration["male_outflow_period"])**2
country_emigration["ES_ctr_male"]=country_emigration.groupby(["period","residenceCountry"])["ES_ctr_male"].transform("sum")
country_emigration["ES_ctr_male"]=1-country_emigration["ES_ctr_male"]

country_immigration["IS_ctr_male"]=(country_immigration["male_period_flow"]/country_immigration["male_inflow_period"])**2
country_immigration["IS_ctr_male"]=country_immigration.groupby(["period","residenceCountry"])["IS_ctr_male"].transform("sum")
country_immigration["IS_ctr_male"]=1-country_immigration["IS_ctr_male"]

In [17]:
## results summary
country_spread_summary=country_emigration[["period","residenceCountry","female_outflow_period","ES_ctr","ES_ctr_female","ES_ctr_male"]].drop_duplicates()
country_spread_summary=country_spread_summary.merge(country_immigration[["period","residenceCountry","female_inflow_period","IS_ctr","IS_ctr_female","IS_ctr_male"]].drop_duplicates(),on=["period","residenceCountry"],how="left")

country_spread_summary["iso2"]=country_spread_summary["residenceCountry"].apply(lambda x: contry_to_iso2(x) )

In [62]:
country_spread_summary.head()

Unnamed: 0,period,residenceCountry,female_outflow_period,ES_ctr,ES_ctr_female,ES_ctr_male,female_inflow_period,IS_ctr,IS_ctr_female,IS_ctr_male,iso2
0,1,Afghanistan,0.0,0.0,1.0,0.0,,,,,AF
1,2,Afghanistan,0.0,0.0,1.0,0.0,,,,,AF
2,3,Afghanistan,0.0,0.0,1.0,0.0,2.0,0.75,0.5,0.5,AF
3,4,Afghanistan,2.0,0.875,0.5,0.8333,1.0,0.8642,0.0,0.8438,AF
4,1,Albania,1.0,0.4444,0.0,0.0,1.0,0.75,0.0,0.6667,AL


In [25]:
# Considering potential difference in absolute numbers of female migrants and male migrants, 
# using the proportion of female and male migrants to calculate the standardized overall emigration and immigration spreads on country level 

In [18]:
country_emigration=country_emigration.fillna(0)
country_emigration_prop=country_emigration.drop(columns=["ES_ctr","ES_ctr_female","ES_ctr_male"])
country_emigration_prop["female_period_flow_prop"]=country_emigration_prop["female_period_flow"]/country_emigration_prop["female_outflow_period"]
country_emigration_prop["male_period_flow_prop"]=country_emigration_prop["male_period_flow"]/country_emigration_prop["male_outflow_period"]
country_emigration_prop=country_emigration_prop.fillna(0)
country_emigration_prop["period_flow_prop"]=country_emigration_prop["female_period_flow_prop"]+country_emigration_prop["male_period_flow_prop"]

country_emigration_prop_count=country_emigration_prop.groupby(["residenceCountry","period"])["period_flow_prop"].sum().reset_index(name='period_flow_prop_count')
country_emigration_prop=country_emigration_prop.merge(country_emigration_prop_count,on=["residenceCountry","period"],how="left")


country_emigration_prop["ES_ctr_normalized"]=(country_emigration_prop["period_flow_prop"]/country_emigration_prop["period_flow_prop_count"])**2
country_emigration_prop["ES_ctr_normalized"]=country_emigration_prop.groupby(["residenceCountry","period"])["ES_ctr_normalized"].transform("sum")
country_emigration_prop["ES_ctr_normalized"]=1-country_emigration_prop["ES_ctr_normalized"]

In [64]:
country_emigration_prop.head()

Unnamed: 0.1,residenceCountry,period,outflow_period,female_outflow_period,male_outflow_period,Unnamed: 0,from,to,all_period_flow,female_period_flow,male_period_flow,period_year,female_period_flow_prop,male_period_flow_prop,period_flow_prop,period_flow_prop_count,ES_ctr_normalized
0,Afghanistan,1,1.0,0.0,1.0,6.0,Afghanistan,Russia,1.0,0.0,1.0,1998-2002,0.0,1.0,1.0,1.0,0.0
1,Afghanistan,2,1.0,0.0,1.0,9.0,Afghanistan,United Kingdom,1.0,0.0,1.0,2003-2007,0.0,1.0,1.0,1.0,0.0
2,Afghanistan,3,2.0,0.0,2.0,3.0,Afghanistan,Iran,2.0,0.0,2.0,2008-2012,0.0,1.0,1.0,1.0,0.0
3,Afghanistan,4,8.0,2.0,6.0,0.0,Afghanistan,Belize,1.0,0.0,1.0,2013-2017,0.0,0.1667,0.1667,2.0,0.8333
4,Afghanistan,4,8.0,2.0,6.0,1.0,Afghanistan,China,1.0,0.0,1.0,2013-2017,0.0,0.1667,0.1667,2.0,0.8333


In [19]:
country_immigration_prop=country_immigration.drop(columns=["IS_ctr","IS_ctr_female","IS_ctr_male"])
country_immigration_prop["female_period_flow_prop"]=country_immigration_prop["female_period_flow"]/country_immigration_prop["female_inflow_period"]
country_immigration_prop["male_period_flow_prop"]=country_immigration_prop["male_period_flow"]/country_immigration_prop["male_inflow_period"]
country_immigration_prop=country_immigration_prop.fillna(0)
country_immigration_prop["period_flow_prop"]=country_immigration_prop["female_period_flow_prop"]+country_immigration_prop["male_period_flow_prop"]

country_immigration_prop_count=country_immigration_prop.groupby(["residenceCountry","period"])["period_flow_prop"].sum().reset_index(name='period_flow_prop_count')
country_immigration_prop=country_immigration_prop.merge(country_immigration_prop_count,on=["residenceCountry","period"],how="left")


country_immigration_prop["IS_ctr_normalized"]=(country_immigration_prop["period_flow_prop"]/country_immigration_prop["period_flow_prop_count"])**2
country_immigration_prop["IS_ctr_normalized"]=country_immigration_prop.groupby(["residenceCountry","period"])["IS_ctr_normalized"].transform("sum")
country_immigration_prop["IS_ctr_normalized"]=1-country_immigration_prop["IS_ctr_normalized"]

In [66]:
country_immigration_prop.head()

Unnamed: 0.1,residenceCountry,period,inflow_period,female_inflow_period,male_inflow_period,Unnamed: 0,from,to,all_period_flow,female_period_flow,male_period_flow,period_year,female_period_flow_prop,male_period_flow_prop,period_flow_prop,period_flow_prop_count,IS_ctr_normalized
0,Afghanistan,3,4.0,2.0,2.0,21601.0,United Kingdom,Afghanistan,1.0,0.0,1.0,2008-2012,0.0,0.5,0.5,2.0,0.75
1,Afghanistan,3,4.0,2.0,2.0,3057.0,Burundi,Afghanistan,1.0,1.0,0.0,2008-2012,0.5,0.0,0.5,2.0,0.75
2,Afghanistan,3,4.0,2.0,2.0,22152.0,United States,Afghanistan,1.0,1.0,0.0,2008-2012,0.5,0.0,0.5,2.0,0.75
3,Afghanistan,3,4.0,2.0,2.0,9269.0,Iran,Afghanistan,1.0,0.0,1.0,2008-2012,0.0,0.5,0.5,2.0,0.75
4,Afghanistan,4,9.0,1.0,8.0,18040.0,South Africa,Afghanistan,1.0,1.0,0.0,2013-2017,1.0,0.0,1.0,2.0,0.7109


In [20]:
emi=country_emigration_prop[["period","residenceCountry","ES_ctr_normalized"]].drop_duplicates()
immi=country_immigration_prop[["period","residenceCountry","IS_ctr_normalized"]].drop_duplicates()

country_spread_summary=country_spread_summary.merge(emi,on=["residenceCountry","period"],how="left").merge(immi,on=["residenceCountry","period"],how="left")

In [68]:
country_spread_summary.head()

Unnamed: 0,period,residenceCountry,female_outflow_period,ES_ctr,ES_ctr_female,ES_ctr_male,female_inflow_period,IS_ctr,IS_ctr_female,IS_ctr_male,iso2,ES_ctr_normalized,IS_ctr_normalized
0,1,Afghanistan,0.0,0.0,1.0,0.0,,,,,AF,0.0,
1,2,Afghanistan,0.0,0.0,1.0,0.0,,,,,AF,0.0,
2,3,Afghanistan,0.0,0.0,1.0,0.0,2.0,0.75,0.5,0.5,AF,0.0,0.75
3,4,Afghanistan,2.0,0.875,0.5,0.8333,1.0,0.8642,0.0,0.8438,AF,0.8333,0.7109
4,1,Albania,1.0,0.4444,0.0,0.0,1.0,0.75,0.0,0.6667,AL,0.5,0.6667


In [31]:
country_spread_summary.to_csv(os.path.join(
    output_dir,"2_country_level_spread.csv"))

<a id='glospread'></a>

### 3. Global-level emigration/ immigration spread

##### return to [index](#index)

In [21]:
global_emigration=net_researcher_country_period[["residenceCountry","period","outflow_period","female_outflow_period","male_outflow_period"]]

global_immigration=net_researcher_country_period[["residenceCountry","period","inflow_period","female_inflow_period","male_inflow_period"]]

global_emigration=global_emigration.merge(global_migration_period_summary,on=["period"],how="left")

global_immigration=global_immigration.merge(global_migration_period_summary,on=["period"],how="left")

In [22]:
global_emigration["ES_global"]=(global_emigration["outflow_period"]/global_emigration["migration_count"])**2
global_emigration["ES_global"]=global_emigration.groupby("period")["ES_global"].transform("sum")
global_emigration["ES_global"]=1-global_emigration["ES_global"]

global_immigration["IS_global"]=(global_immigration["inflow_period"]/global_immigration["migration_count"])**2
global_immigration["IS_global"]=global_immigration.groupby("period")["IS_global"].transform("sum")
global_immigration["IS_global"]=1-global_immigration["IS_global"]


In [37]:
global_emigration.head()

Unnamed: 0,residenceCountry,period,outflow_period,female_outflow_period,male_outflow_period,migration_count,female_migration_count,male_migration_count,migrant_count,female_migrant_count,male_migrant_count,ES_global
0,Afghanistan,1,1.0,0.0,1.0,155445,35885,115580,124189,29161,91684,0.9269
1,Afghanistan,2,1.0,0.0,1.0,183315,48751,129324,155245,41717,108958,0.926
2,Afghanistan,3,2.0,0.0,2.0,247253,72370,167780,208408,61699,140594,0.9326
3,Afghanistan,4,8.0,2.0,6.0,302685,93761,200110,254104,79370,167182,0.9407
4,Albania,1,3.0,1.0,2.0,155445,35885,115580,124189,29161,91684,0.9269


In [38]:
global_immigration.head()

Unnamed: 0,residenceCountry,period,inflow_period,female_inflow_period,male_inflow_period,migration_count,female_migration_count,male_migration_count,migrant_count,female_migrant_count,male_migrant_count,IS_global
0,Afghanistan,1,0.0,0.0,0.0,155445,35885,115580,124189,29161,91684,0.9056
1,Afghanistan,2,0.0,0.0,0.0,183315,48751,129324,155245,41717,108958,0.9169
2,Afghanistan,3,4.0,2.0,2.0,247253,72370,167780,208408,61699,140594,0.9309
3,Afghanistan,4,9.0,1.0,8.0,302685,93761,200110,254104,79370,167182,0.9365
4,Albania,1,4.0,1.0,3.0,155445,35885,115580,124189,29161,91684,0.9056


In [23]:
global_migration_period_summary=global_migration_period_summary[["period","migration_count","female_migration_count","male_migration_count"]]
global_migration_period_summary=global_migration_period_summary.merge(global_emigration[["period","ES_global"]].drop_duplicates(),on="period",how="left")

global_migration_period_summary=global_migration_period_summary.merge(global_immigration[["period","IS_global"]].drop_duplicates(),on="period",how="left")

In [40]:
global_migration_period_summary.head()

Unnamed: 0,period,migration_count,female_migration_count,male_migration_count,ES_global,IS_global
0,1.0,155445,35885,115580,0.9269,0.9056
1,2.0,183315,48751,129324,0.926,0.9169
2,3.0,247253,72370,167780,0.9326,0.9309
3,4.0,302685,93761,200110,0.9407,0.9365


In [24]:
global_emigration["ES_global_female"]=(global_emigration["female_outflow_period"]/global_emigration["female_migration_count"])**2
global_emigration["ES_global_female"]=global_emigration.groupby("period")["ES_global_female"].transform("sum")
global_emigration["ES_global_female"]=1-global_emigration["ES_global_female"]

global_emigration["ES_global_male"]=(global_emigration["male_outflow_period"]/global_emigration["male_migration_count"])**2
global_emigration["ES_global_male"]=global_emigration.groupby("period")["ES_global_male"].transform("sum")
global_emigration["ES_global_male"]=1-global_emigration["ES_global_male"]

global_immigration["IS_global_female"]=(global_immigration["female_inflow_period"]/global_immigration["female_migration_count"])**2
global_immigration["IS_global_female"]=global_immigration.groupby("period")["IS_global_female"].transform("sum")
global_immigration["IS_global_female"]=1-global_immigration["IS_global_female"]

global_immigration["IS_global_male"]=(global_immigration["male_inflow_period"]/global_immigration["male_migration_count"])**2
global_immigration["IS_global_male"]=global_immigration.groupby("period")["IS_global_male"].transform("sum")
global_immigration["IS_global_male"]=1-global_immigration["IS_global_male"]

global_migration_period_summary=global_migration_period_summary.merge(global_emigration[["period","ES_global_female"]].drop_duplicates(),on="period",how="left")
global_migration_period_summary=global_migration_period_summary.merge(global_immigration[["period","IS_global_female"]].drop_duplicates(),on="period",how="left")
global_migration_period_summary=global_migration_period_summary.merge(global_emigration[["period","ES_global_male"]].drop_duplicates(),on="period",how="left")
global_migration_period_summary=global_migration_period_summary.merge(global_immigration[["period","IS_global_male"]].drop_duplicates(),on="period",how="left")

In [44]:
global_migration_period_summary

Unnamed: 0,period,migration_count,female_migration_count,male_migration_count,ES_global,IS_global,ES_global_female_x,IS_global_female_x,ES_global_male_x,IS_global_male_x,ES_global_female_y,IS_global_female_y,ES_global_male_y,IS_global_male_y
0,1.0,155445,35885,115580,0.9269,0.9056,0.9301,0.9019,0.9258,0.9063,0.9301,0.9019,0.9258,0.9063
1,2.0,183315,48751,129324,0.926,0.9169,0.9284,0.9145,0.925,0.9176,0.9284,0.9145,0.925,0.9176
2,3.0,247253,72370,167780,0.9326,0.9309,0.9346,0.9287,0.9318,0.9318,0.9346,0.9287,0.9318,0.9318
3,4.0,302685,93761,200110,0.9407,0.9365,0.9421,0.9351,0.9401,0.9373,0.9421,0.9351,0.9401,0.9373


In [None]:
# using the proportion of female and male migrants to calculate the standardized overall emigration and immigration spreads on global level 

In [25]:
global_emigration_prop=global_emigration.drop(columns=["migrant_count","female_migrant_count","male_migrant_count","ES_global","ES_global_female","ES_global_male"])
global_emigration_prop["female_outflow_period_prop"]=global_emigration_prop["female_outflow_period"]/global_emigration_prop["female_migration_count"]
global_emigration_prop["male_outflow_period_prop"]=global_emigration_prop["male_outflow_period"]/global_emigration_prop["male_migration_count"]
global_emigration_prop["outflow_period_prop"]=global_emigration_prop["male_outflow_period_prop"]+global_emigration_prop["female_outflow_period_prop"]

global_emigration_prop_count=global_emigration_prop.groupby("period")["outflow_period_prop"].sum().reset_index(name='outflow_period_prop_count')
global_emigration_prop=global_emigration_prop.merge(global_emigration_prop_count,on="period",how="left")


global_emigration_prop["ES_global_normalized"]=(global_emigration_prop["outflow_period_prop"]/global_emigration_prop["outflow_period_prop_count"])**2
global_emigration_prop["ES_global_normalized"]=global_emigration_prop.groupby("period")["ES_global_normalized"].transform("sum")
global_emigration_prop["ES_global_normalized"]=1-global_emigration_prop["ES_global_normalized"]

In [46]:
global_emigration_prop.head()

Unnamed: 0,residenceCountry,period,outflow_period,female_outflow_period,male_outflow_period,migration_count,female_migration_count,male_migration_count,female_outflow_period_prop,male_outflow_period_prop,outflow_period_prop,outflow_period_prop_count,ES_global_normalized
0,Afghanistan,1,1.0,0.0,1.0,155445,35885,115580,0.0,0.0,0.0,2.0,0.9284
1,Afghanistan,2,1.0,0.0,1.0,183315,48751,129324,0.0,0.0,0.0,2.0,0.927
2,Afghanistan,3,2.0,0.0,2.0,247253,72370,167780,0.0,0.0,0.0,2.0,0.9334
3,Afghanistan,4,8.0,2.0,6.0,302685,93761,200110,0.0,0.0,0.0001,2.0,0.9414
4,Albania,1,3.0,1.0,2.0,155445,35885,115580,0.0,0.0,0.0,2.0,0.9284


In [26]:
global_immigration_prop=global_immigration.drop(columns=["migrant_count","female_migrant_count","male_migrant_count","IS_global","IS_global_female","IS_global_male"])
global_immigration_prop["female_inflow_period_prop"]=global_immigration_prop["female_inflow_period"]/global_immigration_prop["female_migration_count"]
global_immigration_prop["male_inflow_period_prop"]=global_immigration_prop["male_inflow_period"]/global_immigration_prop["male_migration_count"]
#global_emigration_prop=global_emigration_prop.fillna(0)
global_immigration_prop["inflow_period_prop"]=global_immigration_prop["female_inflow_period_prop"]+global_immigration_prop["male_inflow_period_prop"]

global_immigration_prop_count=global_immigration_prop.groupby("period")["inflow_period_prop"].sum().reset_index(name='inflow_period_prop_count')
global_immigration_prop=global_immigration_prop.merge(global_immigration_prop_count,on="period",how="left")

global_immigration_prop["IS_global_normalized"]=(global_immigration_prop["inflow_period_prop"]/global_immigration_prop["inflow_period_prop_count"])**2
global_immigration_prop["IS_global_normalized"]=global_immigration_prop.groupby("period")["IS_global_normalized"].transform("sum")
global_immigration_prop["IS_global_normalized"]=1-global_immigration_prop["IS_global_normalized"]

In [27]:
emi=global_emigration_prop[["period","ES_global_normalized"]].drop_duplicates()
immi=global_immigration_prop[["period","IS_global_normalized"]].drop_duplicates()

global_migration_period_summary=global_migration_period_summary.merge(emi,on="period",how="left").merge(immi,on="period",how="left")

In [49]:
global_migration_period_summary

Unnamed: 0,period,migration_count,female_migration_count,male_migration_count,ES_global,IS_global,ES_global_female_x,IS_global_female_x,ES_global_male_x,IS_global_male_x,ES_global_female_y,IS_global_female_y,ES_global_male_y,IS_global_male_y,ES_global_normalized,IS_global_normalized
0,1.0,155445,35885,115580,0.9269,0.9056,0.9301,0.9019,0.9258,0.9063,0.9301,0.9019,0.9258,0.9063,0.9284,0.9044
1,2.0,183315,48751,129324,0.926,0.9169,0.9284,0.9145,0.925,0.9176,0.9284,0.9145,0.925,0.9176,0.927,0.9164
2,3.0,247253,72370,167780,0.9326,0.9309,0.9346,0.9287,0.9318,0.9318,0.9346,0.9287,0.9318,0.9318,0.9334,0.9305
3,4.0,302685,93761,200110,0.9407,0.9365,0.9421,0.9351,0.9401,0.9373,0.9421,0.9351,0.9401,0.9373,0.9414,0.9365


In [28]:
global_migration_prop=global_emigration_prop.merge(global_immigration_prop.drop(columns=["migration_count","female_migration_count","male_migration_count"]),
                                                   on=["residenceCountry","period"],how="outer")

global_migration_prop["iso2"]=global_migration_prop["residenceCountry"].apply(lambda x: contry_to_iso2(x) )

In [51]:
global_migration_prop.head()

Unnamed: 0,residenceCountry,period,outflow_period,female_outflow_period,male_outflow_period,migration_count,female_migration_count,male_migration_count,female_outflow_period_prop,male_outflow_period_prop,outflow_period_prop,outflow_period_prop_count,ES_global_normalized,inflow_period,female_inflow_period,male_inflow_period,female_inflow_period_prop,male_inflow_period_prop,inflow_period_prop,inflow_period_prop_count,IS_global_normalized,iso2
0,Afghanistan,1,1.0,0.0,1.0,155445,35885,115580,0.0,0.0,0.0,2.0,0.9284,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.9044,AF
1,Afghanistan,2,1.0,0.0,1.0,183315,48751,129324,0.0,0.0,0.0,2.0,0.927,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.9164,AF
2,Afghanistan,3,2.0,0.0,2.0,247253,72370,167780,0.0,0.0,0.0,2.0,0.9334,4.0,2.0,2.0,0.0,0.0,0.0,2.0,0.9305,AF
3,Afghanistan,4,8.0,2.0,6.0,302685,93761,200110,0.0,0.0,0.0001,2.0,0.9414,9.0,1.0,8.0,0.0,0.0,0.0001,2.0,0.9365,AF
4,Albania,1,3.0,1.0,2.0,155445,35885,115580,0.0,0.0,0.0,2.0,0.9284,4.0,1.0,3.0,0.0,0.0,0.0001,2.0,0.9044,AL


### Weighted global-level migration spreads 

In [29]:
global_migration_period_summary_2=country_spread_summary.merge(global_emigration_prop[["period","residenceCountry","outflow_period_prop","outflow_period_prop_count","female_outflow_period","female_migration_count","male_outflow_period","male_migration_count"]],on=["period","residenceCountry"],how="left")
global_migration_period_summary_2=global_migration_period_summary_2.merge(global_immigration_prop[["period","residenceCountry","inflow_period_prop","inflow_period_prop_count","female_inflow_period","male_inflow_period"]],on=["period","residenceCountry"],how="left")
global_migration_period_summary_2["ES_global_weighted"]=global_migration_period_summary_2["ES_ctr_normalized"]*global_migration_period_summary_2["outflow_period_prop"]/global_migration_period_summary_2["outflow_period_prop_count"]
global_migration_period_summary_2["IS_global_weighted"]=global_migration_period_summary_2["IS_ctr_normalized"]*global_migration_period_summary_2["inflow_period_prop"]/global_migration_period_summary_2["inflow_period_prop_count"]
global_migration_period_summary_2["ES_global_weighted"]=global_migration_period_summary_2.groupby("period")["ES_global_weighted"].transform("sum")
global_migration_period_summary_2["IS_global_weighted"]=global_migration_period_summary_2.groupby("period")["IS_global_weighted"].transform("sum")


In [51]:
global_migration_period_summary_2.head()

Unnamed: 0,period,residenceCountry,female_outflow_period_x,ES_ctr,ES_ctr_female,ES_ctr_male,female_inflow_period_x,IS_ctr,IS_ctr_female,IS_ctr_male,iso2,ES_ctr_normalized,IS_ctr_normalized,outflow_period_prop,outflow_period_prop_count,female_outflow_period_y,female_migration_count,male_outflow_period,male_migration_count,inflow_period_prop,inflow_period_prop_count,female_inflow_period_y,male_inflow_period,ES_global_weighted,IS_global_weighted
0,1,Afghanistan,0.0,0.0,1.0,0.0,,,,,AF,0.0,,0.0,2.0,0.0,35885,1.0,115580,0.0,2.0,0.0,0.0,0.861,0.886
1,2,Afghanistan,0.0,0.0,1.0,0.0,,,,,AF,0.0,,0.0,2.0,0.0,48751,1.0,129324,0.0,2.0,0.0,0.0,0.872,0.883
2,3,Afghanistan,0.0,0.0,1.0,0.0,2.0,0.75,0.5,0.5,AF,0.0,0.75,0.0,2.0,0.0,72370,2.0,167780,0.0,2.0,2.0,2.0,0.8897,0.8917
3,4,Afghanistan,2.0,0.875,0.5,0.8333,1.0,0.8642,0.0,0.8438,AF,0.8333,0.7109,0.0001,2.0,2.0,93761,6.0,200110,0.0001,2.0,1.0,8.0,0.8974,0.903
4,1,Albania,1.0,0.4444,0.0,0.0,1.0,0.75,0.0,0.6667,AL,0.5,0.6667,0.0,2.0,1.0,35885,2.0,115580,0.0001,2.0,1.0,3.0,0.861,0.886


In [30]:
global_migration_period_summary_2=global_migration_period_summary_2.drop(columns=["female_outflow_period_x"]).rename(columns={"female_outflow_period_y":"female_outflow_period"})

global_migration_period_summary_2=global_migration_period_summary_2.drop(columns=["female_inflow_period_x"]).rename(columns={"female_inflow_period_y":"female_inflow_period"})

In [71]:
global_migration_period_summary_2.head()

Unnamed: 0,period,residenceCountry,ES_ctr,ES_ctr_female,ES_ctr_male,IS_ctr,IS_ctr_female,IS_ctr_male,iso2,ES_ctr_normalized,IS_ctr_normalized,outflow_period_prop,outflow_period_prop_count,female_outflow_period,female_migration_count,male_outflow_period,male_migration_count,inflow_period_prop,inflow_period_prop_count,female_inflow_period,male_inflow_period,ES_global_weighted,IS_global_weighted
0,1,Afghanistan,0.0,1.0,0.0,,,,AF,0.0,,0.0,2.0,0.0,35885,1.0,115580,0.0,2.0,0.0,0.0,0.861,0.886
1,2,Afghanistan,0.0,1.0,0.0,,,,AF,0.0,,0.0,2.0,0.0,48751,1.0,129324,0.0,2.0,0.0,0.0,0.872,0.883
2,3,Afghanistan,0.0,1.0,0.0,0.75,0.5,0.5,AF,0.0,0.75,0.0,2.0,0.0,72370,2.0,167780,0.0,2.0,2.0,2.0,0.8897,0.8917
3,4,Afghanistan,0.875,0.5,0.8333,0.8642,0.0,0.8438,AF,0.8333,0.7109,0.0001,2.0,2.0,93761,6.0,200110,0.0001,2.0,1.0,8.0,0.8974,0.903
4,1,Albania,0.4444,0.0,0.0,0.75,0.0,0.6667,AL,0.5,0.6667,0.0,2.0,1.0,35885,2.0,115580,0.0001,2.0,1.0,3.0,0.861,0.886


In [31]:
global_migration_period_summary_2["ES_global_female_weighted"]=global_migration_period_summary_2["ES_ctr_female"]*global_migration_period_summary_2["female_outflow_period"]/global_migration_period_summary_2["female_migration_count"]
global_migration_period_summary_2["IS_global_female_weighted"]=global_migration_period_summary_2["IS_ctr_female"]*global_migration_period_summary_2["female_inflow_period"]/global_migration_period_summary_2["female_migration_count"]
global_migration_period_summary_2["ES_global_female_weighted"]=global_migration_period_summary_2.groupby("period")["ES_global_female_weighted"].transform("sum")
global_migration_period_summary_2["IS_global_female_weighted"]=global_migration_period_summary_2.groupby("period")["IS_global_female_weighted"].transform("sum")


global_migration_period_summary_2["ES_global_male_weighted"]=global_migration_period_summary_2["ES_ctr_male"]*global_migration_period_summary_2["male_outflow_period"]/global_migration_period_summary_2["male_migration_count"]
global_migration_period_summary_2["IS_global_male_weighted"]=global_migration_period_summary_2["IS_ctr_male"]*global_migration_period_summary_2["male_inflow_period"]/global_migration_period_summary_2["male_migration_count"]
global_migration_period_summary_2["ES_global_male_weighted"]=global_migration_period_summary_2.groupby("period")["ES_global_male_weighted"].transform("sum")
global_migration_period_summary_2["IS_global_male_weighted"]=global_migration_period_summary_2.groupby("period")["IS_global_male_weighted"].transform("sum")



In [32]:
global_migration_period_summary_2=global_migration_period_summary_2[["period","ES_global_weighted","ES_global_female_weighted","ES_global_male_weighted","IS_global_weighted","IS_global_female_weighted","IS_global_male_weighted"]].drop_duplicates()


In [74]:
global_migration_period_summary_2

Unnamed: 0,period,ES_global_weighted,ES_global_female_weighted,ES_global_male_weighted,IS_global_weighted,IS_global_female_weighted,IS_global_male_weighted
0,1,0.861,0.8545,0.8632,0.886,0.8839,0.8837
1,2,0.872,0.8656,0.8743,0.883,0.881,0.8808
2,3,0.8897,0.8844,0.8921,0.8917,0.8899,0.8906
3,4,0.8974,0.8925,0.8995,0.903,0.9005,0.9026


In [33]:

global_migration_period_summary=global_migration_period_summary.merge(global_migration_period_summary_2,on="period",how="left")

In [76]:
global_migration_period_summary

Unnamed: 0,period,migration_count,female_migration_count,male_migration_count,ES_global,IS_global,ES_global_female_x,IS_global_female_x,ES_global_male_x,IS_global_male_x,ES_global_female_y,IS_global_female_y,ES_global_male_y,IS_global_male_y,ES_global_normalized,IS_global_normalized,ES_global_weighted,ES_global_female_weighted,ES_global_male_weighted,IS_global_weighted,IS_global_female_weighted,IS_global_male_weighted
0,1.0,155445,35885,115580,0.9269,0.9056,0.9301,0.9019,0.9258,0.9063,0.9301,0.9019,0.9258,0.9063,0.9284,0.9044,0.861,0.8545,0.8632,0.886,0.8839,0.8837
1,2.0,183315,48751,129324,0.926,0.9169,0.9284,0.9145,0.925,0.9176,0.9284,0.9145,0.925,0.9176,0.927,0.9164,0.872,0.8656,0.8743,0.883,0.881,0.8808
2,3.0,247253,72370,167780,0.9326,0.9309,0.9346,0.9287,0.9318,0.9318,0.9346,0.9287,0.9318,0.9318,0.9334,0.9305,0.8897,0.8844,0.8921,0.8917,0.8899,0.8906
3,4.0,302685,93761,200110,0.9407,0.9365,0.9421,0.9351,0.9401,0.9373,0.9421,0.9351,0.9401,0.9373,0.9414,0.9365,0.8974,0.8925,0.8995,0.903,0.9005,0.9026


In [59]:
global_migration_period_summary.to_csv(os.path.join(
    output_dir,"2_global_spread_withweighted.csv"))


<a id='glodestination'></a>

### 4. Preferred destinations by gender on global level

##### return to [index](#index)

In [34]:
destination_detection=net_researcher_country_period.merge(global_migration_period_summary[["period", "migration_count","female_migration_count","male_migration_count"]],on="period",how="left")

destination_detection["all_prop"]=destination_detection["inflow_period"]/destination_detection["migration_count"]
destination_detection["female_prop"]=destination_detection["female_inflow_period"]/destination_detection["female_migration_count"]
destination_detection["male_prop"]=destination_detection["male_inflow_period"]/destination_detection["male_migration_count"]

destination_detection=destination_detection.drop(columns=["Unnamed: 0.1","Unnamed: 0"])

In [102]:
destination_detection.columns

Index(['residenceCountry', 'period', 'outflow_period', 'female_outflow_period',
       'male_outflow_period', 'inflow_period', 'female_inflow_period',
       'male_inflow_period', 'net_all_period', 'net_female_period',
       'net_male_period', 'netrate_all_period', 'netrate_female_period',
       'netrate_male_period', 'gendered_inflow', 'gendered_outflow', 'iso2',
       'migration_count', 'female_migration_count', 'male_migration_count',
       'all_prop', 'female_prop', 'male_prop'],
      dtype='object')

In [80]:
destination_detection[destination_detection["residenceCountry"]=="China"]

Unnamed: 0,residenceCountry,period,outflow_period,female_outflow_period,male_outflow_period,inflow_period,female_inflow_period,male_inflow_period,net_all_period,net_female_period,net_male_period,netrate_all_period,netrate_female_period,netrate_male_period,gendered_inflow,gendered_outflow,iso2,migration_count,female_migration_count,male_migration_count,all_prop,female_prop,male_prop
141,China,1,6165.0,1368.0,4240.0,4219.0,906.0,2936.0,-1946.0,-462.0,-1304.0,-0.0024,-0.004,-0.004,0.3086,0.3226,CN,155445,35885,115580,0.0271,0.0252,0.0254
142,China,2,8646.0,2097.0,5822.0,7497.0,1770.0,5129.0,-1149.0,-327.0,-693.0,-0.0006,-0.001,-0.0008,0.3451,0.3602,CN,183315,48751,129324,0.0409,0.0363,0.0397
143,China,3,15411.0,4166.0,10126.0,13053.0,3328.0,8804.0,-2358.0,-838.0,-1322.0,-0.0007,-0.0012,-0.0008,0.378,0.4114,CN,247253,72370,167780,0.0528,0.046,0.0525
144,China,4,21835.0,5809.0,14558.0,22570.0,5762.0,15367.0,735.0,-47.0,809.0,0.0002,-0.0,0.0003,0.375,0.399,CN,302685,93761,200110,0.0746,0.0615,0.0768


In [35]:
destination_female=destination_detection[["residenceCountry","period","inflow_period","female_inflow_period","male_inflow_period","female_prop","male_prop","all_prop"]].sort_values(by=["period","female_inflow_period"],ascending=[True,False])

destination_female.to_excel(os.path.join(
    output_dir,"3_female_preferred_D_global.xlsx"))

destination_male=destination_female.sort_values(by=["period","male_inflow_period"],ascending=[True,False])
destination_male.to_excel(os.path.join(
    output_dir,"3_male_preferred_D_global.xlsx"))


In [38]:
destination_female12=destination_female.sort_values(by=["period","female_inflow_period"],ascending=[True,False]).groupby("period").head(12).copy()
destination_female12["female"]=destination_female12["female_prop"]*100
destination_female12["male"]=destination_female12["male_prop"]*100
destination_female12.to_excel(os.path.join(
    output_dir,"3_female_preferred_D_global_top12.xlsx"))

destination_male13=destination_male.sort_values(by=["period","male_inflow_period"],ascending=[True,False]).groupby("period").head(13).copy()
destination_male13["female"]=destination_male13["female_prop"]*100
destination_male13["male"]=destination_male13["male_prop"]*100

destination_male13.to_excel(os.path.join(
    output_dir,"3_male_preferred_D_global_top13.xlsx"))

<a id='ctrestination'></a>

### 5. Preferred destinations by gender on country level

##### return to [index](#index)

In [66]:
od_all=all_female_male_flow.sort_values(by=["period","all_period_flow"],ascending=[True,False])

net_migration=net_researcher_country_period[["residenceCountry","period","female_outflow_period","male_outflow_period"]]

In [67]:
female=od_all.sort_values(by=["period","from","female_period_flow"],ascending=[True,True,False]).groupby(["period","from"]).head(3).copy()
female=female.merge(net_migration[["residenceCountry","period","female_outflow_period"]],left_on=["from","period"],right_on=["residenceCountry","period"],how="left")
female["female_prop"]=female["female_period_flow"]/female["female_outflow_period"]
female['d_order'] = female.groupby(['from',"period"]).cumcount()+1
female.to_excel(os.path.join(
    output_dir,"4_female_preferred_D_ctr.xlsx"))

male=od_all.sort_values(by=["period","from","male_period_flow"],ascending=[True,True,False]).groupby(["period","from"]).head(3).copy()
male=male.merge(net_migration[["residenceCountry","period","male_outflow_period"]],left_on=["from","period"],right_on=["residenceCountry","period"],how="left")
male["male_prop"]=male["male_period_flow"]/male["male_outflow_period"]
male['d_order'] = male.groupby(['from',"period"]).cumcount()+1
male.to_excel(os.path.join(
    output_dir,"4_male_preferred_D_ctr.xlsx"))