# Checkpoint 3: Country Analysis

- Find top nine countries which have received the highest total funding 

In [1]:
# Loading libraries and reading data

import numpy as np
import pandas as pd

# Reading delimited file using pd.read_csv(filepath, sep, header)
# Using encoding = "ISO-8859-1"
companies = pd.read_csv("companies.txt", sep="\t", encoding = "ISO-8859-1")
rounds2 = pd.read_csv("rounds2.csv", encoding = "ISO-8859-1")

In [2]:
# unique companies present in rounds2
rounds2.nunique()
companies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 66368 entries, 0 to 66367
Data columns (total 10 columns):
permalink        66368 non-null object
name             66367 non-null object
homepage_url     61310 non-null object
category_list    63220 non-null object
status           66368 non-null object
country_code     59410 non-null object
state_code       57821 non-null object
region           58338 non-null object
city             58340 non-null object
founded_at       51147 non-null object
dtypes: object(10)
memory usage: 5.1+ MB


In [3]:
# remove Nan rows for category list
companies['category_list'].isnull().any(axis=0)
companies = companies[~pd.isnull(companies['category_list'])]
companies.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 63220 entries, 0 to 66367
Data columns (total 10 columns):
permalink        63220 non-null object
name             63219 non-null object
homepage_url     59074 non-null object
category_list    63220 non-null object
status           63220 non-null object
country_code     57804 non-null object
state_code       56268 non-null object
region           56765 non-null object
city             56767 non-null object
founded_at       49711 non-null object
dtypes: object(10)
memory usage: 5.3+ MB


In [4]:
# Merging the dataframes
# company_permalink,permalink is the common column/key, which has to be provided to the 'on' argument
# how = 'inner' makes sure that all the comapnies ids are reflectd in the master_frame

companies['permalink'] = companies['permalink'].astype(str).str.lower()
rounds2['company_permalink'] = rounds2['company_permalink'].astype(str).str.lower()

master_frame = pd.merge(companies, rounds2, how='inner', left_on='permalink', right_on='company_permalink')

In [5]:
# raised_amount_usd has considerable number of missing value.
# So, it will not help in finding average for funding_round_type
# Let's remove such rows from master_frame

# removing NaN Price rows
master_frame = master_frame[~np.isnan(master_frame['raised_amount_usd'])]

In [6]:
#  Grouping: First, we will group the master_frame by funding_round_type
df_by_funding_round_type = master_frame.groupby('funding_round_type')

# Mean : Let's find the mean for raised_amount_usd 
# Not filterig for required 4 funding_round_type as the values are visible without filters
# Removed scientific notation
#funding_raised_df = pd.DataFrame(df_by_funding_round_type.raised_amount_usd.mean().apply(lambda x: format(x, 'f')))
# Avaerage in million USD
funding_raised_df = pd.DataFrame(df_by_funding_round_type.raised_amount_usd.mean()/1000000)
funding_raised_df

Unnamed: 0_level_0,raised_amount_usd
funding_round_type,Unnamed: 1_level_1
angel,0.957446
convertible_note,1.469975
debt_financing,17.028929
equity_crowdfunding,0.518977
grant,4.330614
non_equity_assistance,0.416599
post_ipo_debt,168.704572
post_ipo_equity,82.434122
private_equity,73.41846
product_crowdfunding,1.251196


In [7]:
# Let's find top 9 countries by 'Venture' funding_round_type

master_frame_venture_df = master_frame[master_frame['funding_round_type'] == 'venture']
df_by_country_funding_round_type = master_frame_venture_df.groupby(['country_code', 'funding_round_type'])
#df_by_country_funding_round_type
top9 = pd.DataFrame(df_by_country_funding_round_type.raised_amount_usd.sum()/1000000).sort_values('raised_amount_usd', ascending = False).head(9)
top9


Unnamed: 0_level_0,Unnamed: 1_level_0,raised_amount_usd
country_code,funding_round_type,Unnamed: 2_level_1
USA,venture,420068.029342
CHN,venture,39338.918773
GBR,venture,20072.813004
IND,venture,14261.508718
CAN,venture,9482.217668
FRA,venture,7208.45696
ISR,venture,6854.350477
DEU,venture,6305.521981
JPN,venture,3167.647127
