## Retrieve startup count data

In [1]:
import json

import pandas as pd
import pycountry
import requests
from bs4 import BeautifulSoup


# Manually downloaded from https://www.startupranking.com/countries because of CloudFlare protection
with open('html_cache/https___www.startupranking.com_countries.html') as file:
    sr_bs = BeautifulSoup(file.read())


sr_countries_tag = sr_bs.find_all(type='text/javascript')[17]
sr_countries = json.loads(sr_countries_tag.text[25:-6])
sr_df = pd.DataFrame(sr_countries['data'])[['code', 'name', 'value']] \
    .rename(columns=dict(code='iso2', value='startup_count'))


sr_df.head()

Unnamed: 0,iso2,name,startup_count
0,US,United States,82257
1,IN,India,17508
2,GB,United Kingdom,7523
3,CA,Canada,4062
4,AU,Australia,3021


### Retrieve population data

In [2]:
import requests


pop_res = requests.get('https://countriesnow.space/api/v0.1/countries/population')
pop_json = pop_res.json()['data']
pop_df = pd.DataFrame([
    dict(
        iso2=pycountry.countries.get(alpha_3=row['iso3']).alpha_2,
        population=row['populationCounts'][-1]['value']
    )
    for row in pop_json
    if pycountry.countries.get(alpha_3=row['iso3'])
])

pop_df.head()

Unnamed: 0,iso2,population
0,AF,37172386
1,AL,2866376
2,DZ,42228429
3,AS,55465
4,AD,77006


### Show countries by the number of startups per capita (only countries with >=1 million population)

In [3]:
df_full = sr_df.set_index('iso2').join(pop_df.set_index('iso2'), how='inner').reset_index()
df_full['startups_per_capita'] = df_full['startup_count'] / df_full['population']

large_countries = df_full[df_full['population'] >= 1e6]
per_capita = large_countries.sort_values('startups_per_capita', ascending=False, ignore_index=True).head(60)
per_capita.index += 1
per_capita

Unnamed: 0,iso2,name,startup_count,population,startups_per_capita
1,US,United States,82257,326687501,0.000252
2,EE,Estonia,306,1321977,0.000231
3,SG,Singapore,1265,5638676,0.000224
4,AE,United Arab Emirates,1342,9630959,0.000139
5,AU,Australia,3021,24982688,0.000121
6,GB,United Kingdom,7523,66460344,0.000113
7,IL,Israel,994,8882800,0.000112
8,CA,Canada,4062,37057765,0.00011
9,IE,Ireland,488,4867309,0.0001
10,CH,Switzerland,837,8513227,9.8e-05


### Show countries by the number of startups per capita (all countries)

In [4]:
per_capita_all = df_full.sort_values('startups_per_capita', ascending=False, ignore_index=True).head(60)
per_capita_all.index += 1
per_capita_all

Unnamed: 0,iso2,name,startup_count,population,startups_per_capita
1,AS,American Samoa,52,55465,0.000938
2,VG,British Virgin Islands,17,29802,0.00057
3,BM,Bermuda,20,63973,0.000313
4,KY,Cayman Islands,19,64174,0.000296
5,AD,Andorra,22,77006,0.000286
6,US,United States,82257,326687501,0.000252
7,GI,Gibraltar,8,33718,0.000237
8,EE,Estonia,306,1321977,0.000231
9,SG,Singapore,1265,5638676,0.000224
10,LI,Liechtenstein,7,37910,0.000185


### Show countries by the number of startups overall (all countries)

In [5]:
overall = df_full.sort_values('startup_count', ascending=False, ignore_index=True).head(60)
overall.index += 1
overall

Unnamed: 0,iso2,name,startup_count,population,startups_per_capita
1,US,United States,82257,326687501,0.0002517911
2,IN,India,17508,1352617328,1.294379e-05
3,GB,United Kingdom,7523,66460344,0.0001131953
4,CA,Canada,4062,37057765,0.0001096127
5,AU,Australia,3021,24982688,0.0001209237
6,ID,Indonesia,2665,267663435,9.956534e-06
7,DE,Germany,2526,82905782,3.046832e-05
8,FR,France,1725,66977107,2.575507e-05
9,ES,Spain,1545,46796540,3.301526e-05
10,AE,United Arab Emirates,1342,9630959,0.0001393423
