# Investment Case Group Project

In [21]:
# import pandas and numpy
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
# Set pretty printing format for floats
pd.set_option('float_format', '{:.2f}'.format)

## Checkpoints - Part 1

### Checkpoint 1: Data Cleaning 1

In [22]:
# loading the companies.txt file into a dataframe named companies
# Using encoding = "ISO-8859-1"
companies = pd.read_csv("companies.txt", sep="\t", encoding = "ISO-8859-1")

# Cleaning the encoding issues in permalink and name columns
for col in ['permalink','name']:
    companies[col] = companies[col].str.encode('utf-8').str.decode('ascii', 'ignore')

companies.head()

Unnamed: 0,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,founded_at
0,/Organization/-Fame,#fame,http://livfame.com,Media,operating,IND,16,Mumbai,Mumbai,
1,/Organization/-Qounter,:Qounter,http://www.qounter.com,Application Platforms|Real Time|Social Network...,operating,USA,DE,DE - Other,Delaware City,04-09-2014
2,/Organization/-The-One-Of-Them-Inc-,"(THE) ONE of THEM,Inc.",http://oneofthem.jp,Apps|Games|Mobile,operating,,,,,
3,/Organization/0-6-Com,0-6.com,http://www.0-6.com,Curated Web,operating,CHN,22,Beijing,Beijing,01-01-2007
4,/Organization/004-Technologies,004 Technologies,http://004gmbh.de/en/004-interact,Software,operating,USA,IL,"Springfield, Illinois",Champaign,01-01-2010


In [23]:
# loading the rounds2.csv file into a dataframe named reounds2
# Using encoding = "ISO-8859-1"
rounds2 = pd.read_csv("rounds2.csv", encoding = "ISO-8859-1")

# Cleaning the encoding issues in company_permalink column
rounds2.company_permalink = rounds2.company_permalink.str.encode('utf-8').str.decode('ascii', 'ignore')
rounds2.head()

Unnamed: 0,company_permalink,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd
0,/organization/-fame,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,B,05-01-2015,10000000.0
1,/ORGANIZATION/-QOUNTER,/funding-round/22dacff496eb7acb2b901dec1dfe5633,venture,A,14-10-2014,
2,/organization/-qounter,/funding-round/b44fbb94153f6cdef13083530bb48030,seed,,01-03-2014,700000.0
3,/ORGANIZATION/-THE-ONE-OF-THEM-INC-,/funding-round/650b8f704416801069bb178a1418776b,venture,B,30-01-2014,3406878.0
4,/organization/0-6-com,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,A,19-03-2008,2000000.0


In [24]:
# Changing the key columns to lower case
rounds2.company_permalink = rounds2.company_permalink.str.lower()
companies.permalink = companies.permalink.str.lower()

### Results Expected: Table 1.1

In [25]:
# How many unique companies are present in rounds2?
len(rounds2.company_permalink.unique())

66368

In [26]:
# How many unique companies are present in companies?
len(companies.permalink.unique())

66368

In [27]:
# Are there any companies in the rounds2 file which are not present in companies? (N)
rounds2[~rounds2.company_permalink.isin(companies.permalink.unique().tolist())]

Unnamed: 0,company_permalink,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd


### Since no rows are returned by the above execution, there are no companies in rounds2 which are not present in companies.

In [28]:
""" Merge the two data frames so that all variables (columns) in the companies frame are added to the rounds2 data frame.
Name the merged frame master_frame. """

master_frame = pd.merge(rounds2, companies, how = 'left', left_on = 'company_permalink', right_on = 'permalink')
master_frame

Unnamed: 0,company_permalink,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,founded_at
0,/organization/-fame,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,B,05-01-2015,10000000.00,/organization/-fame,#fame,http://livfame.com,Media,operating,IND,16,Mumbai,Mumbai,
1,/organization/-qounter,/funding-round/22dacff496eb7acb2b901dec1dfe5633,venture,A,14-10-2014,,/organization/-qounter,:Qounter,http://www.qounter.com,Application Platforms|Real Time|Social Network...,operating,USA,DE,DE - Other,Delaware City,04-09-2014
2,/organization/-qounter,/funding-round/b44fbb94153f6cdef13083530bb48030,seed,,01-03-2014,700000.00,/organization/-qounter,:Qounter,http://www.qounter.com,Application Platforms|Real Time|Social Network...,operating,USA,DE,DE - Other,Delaware City,04-09-2014
3,/organization/-the-one-of-them-inc-,/funding-round/650b8f704416801069bb178a1418776b,venture,B,30-01-2014,3406878.00,/organization/-the-one-of-them-inc-,"(THE) ONE of THEM,Inc.",http://oneofthem.jp,Apps|Games|Mobile,operating,,,,,
4,/organization/0-6-com,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,A,19-03-2008,2000000.00,/organization/0-6-com,0-6.com,http://www.0-6.com,Curated Web,operating,CHN,22,Beijing,Beijing,01-01-2007
5,/organization/004-technologies,/funding-round/1278dd4e6a37fa4b7d7e06c21b3c1830,venture,,24-07-2014,,/organization/004-technologies,004 Technologies,http://004gmbh.de/en/004-interact,Software,operating,USA,IL,"Springfield, Illinois",Champaign,01-01-2010
6,/organization/01games-technology,/funding-round/7d53696f2b4f607a2f2a8cbb83d01839,undisclosed,,01-07-2014,41250.00,/organization/01games-technology,01Games Technology,http://www.01games.hk/,Games,operating,HKG,,Hong Kong,Hong Kong,
7,/organization/0ndine-biomedical-inc,/funding-round/2b9d3ac293d5cdccbecff5c8cb0f327d,seed,,11-09-2009,43360.00,/organization/0ndine-biomedical-inc,Ondine Biomedical Inc.,http://ondinebio.com,Biotechnology,operating,CAN,BC,Vancouver,Vancouver,01-01-1997
8,/organization/0ndine-biomedical-inc,/funding-round/954b9499724b946ad8c396a57a5f3b72,venture,,21-12-2009,719491.00,/organization/0ndine-biomedical-inc,Ondine Biomedical Inc.,http://ondinebio.com,Biotechnology,operating,CAN,BC,Vancouver,Vancouver,01-01-1997
9,/organization/0xdata,/funding-round/383a9bd2c04f7038bb543ccef5ba3eae,seed,,22-05-2013,3000000.00,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,01-01-2011


In [29]:
# How many observations are present in master_frame?
len(master_frame.index)

114949

### Cleaning the master_frame to remove rows where  raised_amout_usd value is missing

In [30]:
len(master_frame[master_frame.raised_amount_usd.isnull()].index)

19990

In [31]:
# There are 19990 rows with no value for raised_amount_usd. Cleaning these rows.
master_frame = master_frame[~master_frame.raised_amount_usd.isnull()]

In [32]:
# Dividing the raise_amount_usd by 100,00,00 so that all the number will be in millions
master_frame.raised_amount_usd = master_frame.raised_amount_usd.apply(lambda x : x/1000000)

### All the amounts after this point will be in million USD.

### Checkpoint 2: Funding Type Analysis

In [33]:
# Average funding amount of funding types - Venture, Angel, Seed, Private Equity
df_by_fundinground_type = master_frame.groupby('funding_round_type')
df_by_fundinground_type['raised_amount_usd'].mean()[['venture','angel','seed','private_equity']]

funding_round_type
venture          11.75
angel             0.96
seed              0.72
private_equity   73.31
Name: raised_amount_usd, dtype: float64

### The Chosen investment type is "Venture" as it meets the constraint around the range of funding preferred by Spark Funds, i.e. 5 to 15 million USD.

## Checkpoints - Part 2
### Checkpoint 3: Country Analysis

In [34]:
# Creating a dataframe with the investments only for "Venture" investment type.

df_investment_type_venture = master_frame[master_frame.funding_round_type == "venture"]
df_investment_type_venture

Unnamed: 0,company_permalink,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,founded_at
0,/organization/-fame,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,B,05-01-2015,10.00,/organization/-fame,#fame,http://livfame.com,Media,operating,IND,16,Mumbai,Mumbai,
3,/organization/-the-one-of-them-inc-,/funding-round/650b8f704416801069bb178a1418776b,venture,B,30-01-2014,3.41,/organization/-the-one-of-them-inc-,"(THE) ONE of THEM,Inc.",http://oneofthem.jp,Apps|Games|Mobile,operating,,,,,
4,/organization/0-6-com,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,A,19-03-2008,2.00,/organization/0-6-com,0-6.com,http://www.0-6.com,Curated Web,operating,CHN,22,Beijing,Beijing,01-01-2007
8,/organization/0ndine-biomedical-inc,/funding-round/954b9499724b946ad8c396a57a5f3b72,venture,,21-12-2009,0.72,/organization/0ndine-biomedical-inc,Ondine Biomedical Inc.,http://ondinebio.com,Biotechnology,operating,CAN,BC,Vancouver,Vancouver,01-01-1997
10,/organization/0xdata,/funding-round/3bb2ee4a2d89251a10aaa735b1180e44,venture,B,09-11-2015,20.00,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,01-01-2011
11,/organization/0xdata,/funding-round/ae2a174c06517c2394aed45006322a7e,venture,,03-01-2013,1.70,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,01-01-2011
12,/organization/0xdata,/funding-round/e1cfcbe1bdf4c70277c5f29a3482f24e,venture,A,19-07-2014,8.90,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,01-01-2011
22,/organization/1-mainstream,/funding-round/b952cbaf401f310927430c97b68162ea,venture,,17-03-2015,5.00,/organization/1-mainstream,1 Mainstream,http://www.1mainstream.com,Apps|Cable|Distribution|Software,acquired,USA,CA,SF Bay Area,Cupertino,01-03-2012
28,/organization/10-minutes-with,/funding-round/0faccbbcc5818dc5326469f13f5a8ac8,venture,A,09-10-2014,4.00,/organization/10-minutes-with,10 Minutes With,http://10minuteswith.com,Education,operating,GBR,H9,London,London,01-01-2013
34,/organization/1000memories,/funding-round/502bd0e50c27616995e4bdad24605ef8,venture,A,16-02-2011,2.52,/organization/1000memories,1000memories,http://1000memories.com,Curated Web,acquired,USA,CA,SF Bay Area,San Francisco,01-07-2010


In [35]:
# Top nine countries which have received the highest total funding (across ALL sectors for the chosen investment type)

top9 = df_investment_type_venture.pivot_table(values='raised_amount_usd', index = 'country_code', aggfunc = 'sum').sort_values(by='raised_amount_usd', ascending = False).head(9)
top9

Unnamed: 0_level_0,raised_amount_usd
country_code,Unnamed: 1_level_1
USA,422510.84
CHN,39835.42
GBR,20245.63
IND,14391.86
CAN,9583.33
FRA,7259.54
ISR,6907.51
DEU,6346.96
JPN,3363.68


In [36]:
# Following code has been written to bring country names for the country codes
# import for country codes.
# please run 'pip install pycountry' from command prompt if the module doesn't exist
# To install it from Jupyter notebook, Please run the following:
# import sys
# !{sys.executable} -m pip install pycountry
import pycountry
# creating dataframe for country codes and country names
countries_df = pd.DataFrame([country.name, country.alpha_3] for country in pycountry.countries)
countries_df.rename(index=str, columns={0: "country_name", 1: "country_code"}, inplace = True)
countries_df
# Adding the country_name to the top9 dataframe
top9_with_country_name = pd.merge(top9,countries_df,how='left',on='country_code')
top9_with_country_name

Unnamed: 0,country_code,raised_amount_usd,country_name
0,USA,422510.84,United States
1,CHN,39835.42,China
2,GBR,20245.63,United Kingdom
3,IND,14391.86,India
4,CAN,9583.33,Canada
5,FRA,7259.54,France
6,ISR,6907.51,Israel
7,DEU,6346.96,Germany
8,JPN,3363.68,Japan


### Referring to the above dataframe and the pdf(Countries_where_English_is_an_official_language.pdf) given in the downloads section, It can be seen that the top 3 english speaking countries in the top 9 countries by investment amount for investment type "Venture" are United States(USA), United Kingdom(GBR) and India(IND)

### Checkpoint 4: Sector Analysis 1

In [38]:
# Loading the mapping.csv file into a dataframe

mapping = pd.read_csv("mapping.csv")
mapping

Unnamed: 0,category_list,Automotive & Sports,Blanks,Cleantech / Semiconductors,Entertainment,Health,Manufacturing,"News, Search and Messaging",Others,"Social, Finance, Analytics, Advertising"
0,,0,1,0,0,0,0,0,0,0
1,3D,0,0,0,0,0,1,0,0,0
2,3D Printing,0,0,0,0,0,1,0,0,0
3,3D Technology,0,0,0,0,0,1,0,0,0
4,Accounting,0,0,0,0,0,0,0,0,1
5,Active Lifestyle,0,0,0,0,1,0,0,0,0
6,Ad Targeting,0,0,0,0,0,0,0,0,1
7,Advanced Materials,0,0,0,0,0,1,0,0,0
8,Adventure Travel,1,0,0,0,0,0,0,0,0
9,Advertising,0,0,0,0,0,0,0,0,1


In [39]:
# By observing the file mapping.csv, it can be noticed that there are some category names which contain '0' in place of 'na' (charater sequence and not the missing value)
# There are 52 such rows, the number can be found by len(mapping[mapping.category_list.str.match("^([0]*)([a-zA-Z0 ]*)([0])([a-zA-Z ]*)$", na = False)].index)
# Correcting the category names

import re
pattern = re.compile("^([0]*)([a-zA-Z0 ]*)([0])([a-zA-Z ]*)$")
mapping.category_list =  mapping[~mapping.category_list.isnull()].category_list.apply(lambda x : x.replace('0','na').lower() if pattern.match(x) else x.lower())
mapping

Unnamed: 0,category_list,Automotive & Sports,Blanks,Cleantech / Semiconductors,Entertainment,Health,Manufacturing,"News, Search and Messaging",Others,"Social, Finance, Analytics, Advertising"
0,,0,1,0,0,0,0,0,0,0
1,3d,0,0,0,0,0,1,0,0,0
2,3d printing,0,0,0,0,0,1,0,0,0
3,3d technology,0,0,0,0,0,1,0,0,0
4,accounting,0,0,0,0,0,0,0,0,1
5,active lifestyle,0,0,0,0,1,0,0,0,0
6,ad targeting,0,0,0,0,0,0,0,0,1
7,advanced materials,0,0,0,0,0,1,0,0,0
8,adventure travel,1,0,0,0,0,0,0,0,0
9,advertising,0,0,0,0,0,0,0,0,1


In [40]:
category_to_main_sector = pd.melt(mapping,id_vars=["category_list"])
# Removing the rows from category_to_main_sector where value = 0
category_to_main_sector = category_to_main_sector[~(category_to_main_sector.value == 0)]
# Now that we have mapping of category to main sector, dropping the column "value"
category_to_main_sector = category_to_main_sector.drop('value',axis=1)
# Renaming the columns to facilitate merge later
category_to_main_sector.rename(index=str, columns={"category_list": "primary_sector", "variable": "main_sector"}, inplace = True)
category_to_main_sector

Unnamed: 0,primary_sector,main_sector
8,adventure travel,Automotive & Sports
14,aerospace,Automotive & Sports
45,auto,Automotive & Sports
46,automated kiosk,Automotive & Sports
47,automotive,Automotive & Sports
57,bicycles,Automotive & Sports
69,boating industry,Automotive & Sports
87,cad,Automotive & Sports
93,cars,Automotive & Sports
188,design,Automotive & Sports


In [41]:
master_frame.loc[:,'primary_sector'] = master_frame.category_list.str.split("|", n = 1, expand = True).loc[:,0]
master_frame

Unnamed: 0,company_permalink,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,founded_at,primary_sector
0,/organization/-fame,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,B,05-01-2015,10.00,/organization/-fame,#fame,http://livfame.com,Media,operating,IND,16,Mumbai,Mumbai,,Media
2,/organization/-qounter,/funding-round/b44fbb94153f6cdef13083530bb48030,seed,,01-03-2014,0.70,/organization/-qounter,:Qounter,http://www.qounter.com,Application Platforms|Real Time|Social Network...,operating,USA,DE,DE - Other,Delaware City,04-09-2014,Application Platforms
3,/organization/-the-one-of-them-inc-,/funding-round/650b8f704416801069bb178a1418776b,venture,B,30-01-2014,3.41,/organization/-the-one-of-them-inc-,"(THE) ONE of THEM,Inc.",http://oneofthem.jp,Apps|Games|Mobile,operating,,,,,,Apps
4,/organization/0-6-com,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,A,19-03-2008,2.00,/organization/0-6-com,0-6.com,http://www.0-6.com,Curated Web,operating,CHN,22,Beijing,Beijing,01-01-2007,Curated Web
6,/organization/01games-technology,/funding-round/7d53696f2b4f607a2f2a8cbb83d01839,undisclosed,,01-07-2014,0.04,/organization/01games-technology,01Games Technology,http://www.01games.hk/,Games,operating,HKG,,Hong Kong,Hong Kong,,Games
7,/organization/0ndine-biomedical-inc,/funding-round/2b9d3ac293d5cdccbecff5c8cb0f327d,seed,,11-09-2009,0.04,/organization/0ndine-biomedical-inc,Ondine Biomedical Inc.,http://ondinebio.com,Biotechnology,operating,CAN,BC,Vancouver,Vancouver,01-01-1997,Biotechnology
8,/organization/0ndine-biomedical-inc,/funding-round/954b9499724b946ad8c396a57a5f3b72,venture,,21-12-2009,0.72,/organization/0ndine-biomedical-inc,Ondine Biomedical Inc.,http://ondinebio.com,Biotechnology,operating,CAN,BC,Vancouver,Vancouver,01-01-1997,Biotechnology
9,/organization/0xdata,/funding-round/383a9bd2c04f7038bb543ccef5ba3eae,seed,,22-05-2013,3.00,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,01-01-2011,Analytics
10,/organization/0xdata,/funding-round/3bb2ee4a2d89251a10aaa735b1180e44,venture,B,09-11-2015,20.00,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,01-01-2011,Analytics
11,/organization/0xdata,/funding-round/ae2a174c06517c2394aed45006322a7e,venture,,03-01-2013,1.70,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,01-01-2011,Analytics


In [42]:
master_frame.primary_sector = master_frame.primary_sector.str.lower()
master_frame = pd.merge(master_frame,category_to_main_sector,how='left',on='primary_sector')
master_frame

Unnamed: 0,company_permalink,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,founded_at,primary_sector,main_sector
0,/organization/-fame,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,B,05-01-2015,10.00,/organization/-fame,#fame,http://livfame.com,Media,operating,IND,16,Mumbai,Mumbai,,media,Entertainment
1,/organization/-qounter,/funding-round/b44fbb94153f6cdef13083530bb48030,seed,,01-03-2014,0.70,/organization/-qounter,:Qounter,http://www.qounter.com,Application Platforms|Real Time|Social Network...,operating,USA,DE,DE - Other,Delaware City,04-09-2014,application platforms,"News, Search and Messaging"
2,/organization/-the-one-of-them-inc-,/funding-round/650b8f704416801069bb178a1418776b,venture,B,30-01-2014,3.41,/organization/-the-one-of-them-inc-,"(THE) ONE of THEM,Inc.",http://oneofthem.jp,Apps|Games|Mobile,operating,,,,,,apps,"News, Search and Messaging"
3,/organization/0-6-com,/funding-round/5727accaeaa57461bd22a9bdd945382d,venture,A,19-03-2008,2.00,/organization/0-6-com,0-6.com,http://www.0-6.com,Curated Web,operating,CHN,22,Beijing,Beijing,01-01-2007,curated web,"News, Search and Messaging"
4,/organization/01games-technology,/funding-round/7d53696f2b4f607a2f2a8cbb83d01839,undisclosed,,01-07-2014,0.04,/organization/01games-technology,01Games Technology,http://www.01games.hk/,Games,operating,HKG,,Hong Kong,Hong Kong,,games,Entertainment
5,/organization/0ndine-biomedical-inc,/funding-round/2b9d3ac293d5cdccbecff5c8cb0f327d,seed,,11-09-2009,0.04,/organization/0ndine-biomedical-inc,Ondine Biomedical Inc.,http://ondinebio.com,Biotechnology,operating,CAN,BC,Vancouver,Vancouver,01-01-1997,biotechnology,Cleantech / Semiconductors
6,/organization/0ndine-biomedical-inc,/funding-round/954b9499724b946ad8c396a57a5f3b72,venture,,21-12-2009,0.72,/organization/0ndine-biomedical-inc,Ondine Biomedical Inc.,http://ondinebio.com,Biotechnology,operating,CAN,BC,Vancouver,Vancouver,01-01-1997,biotechnology,Cleantech / Semiconductors
7,/organization/0xdata,/funding-round/383a9bd2c04f7038bb543ccef5ba3eae,seed,,22-05-2013,3.00,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,01-01-2011,analytics,"Social, Finance, Analytics, Advertising"
8,/organization/0xdata,/funding-round/3bb2ee4a2d89251a10aaa735b1180e44,venture,B,09-11-2015,20.00,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,01-01-2011,analytics,"Social, Finance, Analytics, Advertising"
9,/organization/0xdata,/funding-round/ae2a174c06517c2394aed45006322a7e,venture,,03-01-2013,1.70,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,01-01-2011,analytics,"Social, Finance, Analytics, Advertising"


### Checkpoint 5: Sector Analysis 2

In [43]:
# Cleaning out the rows where country_code value is missing
master_frame = master_frame[~master_frame.country_code.isnull()]

In [45]:
# Creating a dataframe with funding type = "Venture", country is India, USA or GBR and raised amount is between 5 to 15 million
frame_fundingtype_country_range = master_frame[(master_frame.funding_round_type == 'venture') & (master_frame.raised_amount_usd.between(5,15)) & master_frame.country_code.isin(['IND','USA','GBR'])]
frame_fundingtype_country_range

Unnamed: 0,company_permalink,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,founded_at,primary_sector,main_sector
0,/organization/-fame,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,B,05-01-2015,10.00,/organization/-fame,#fame,http://livfame.com,Media,operating,IND,16,Mumbai,Mumbai,,media,Entertainment
10,/organization/0xdata,/funding-round/e1cfcbe1bdf4c70277c5f29a3482f24e,venture,A,19-07-2014,8.90,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,01-01-2011,analytics,"Social, Finance, Analytics, Advertising"
16,/organization/1-mainstream,/funding-round/b952cbaf401f310927430c97b68162ea,venture,,17-03-2015,5.00,/organization/1-mainstream,1 Mainstream,http://www.1mainstream.com,Apps|Cable|Distribution|Software,acquired,USA,CA,SF Bay Area,Cupertino,01-03-2012,apps,"News, Search and Messaging"
78,/organization/128-technology,/funding-round/fb6216a30cb566ede89e0bee0623a634,venture,,16-12-2014,12.00,/organization/128-technology,128 Technology,http://www.128technology.com/,Service Providers|Technology,operating,USA,MA,Boston,Burlington,07-07-2014,service providers,Others
84,/organization/1366-technologies,/funding-round/424129ce1235cfab2655ee81305f7c2b,venture,C,15-10-2013,15.00,/organization/1366-technologies,1366 Technologies,http://www.1366tech.com,Manufacturing,operating,USA,MA,Boston,Bedford,01-01-2007,manufacturing,Manufacturing
85,/organization/1366-technologies,/funding-round/6d3f3797371956ece035b8478c1441b2,venture,C,09-04-2015,5.00,/organization/1366-technologies,1366 Technologies,http://www.1366tech.com,Manufacturing,operating,USA,MA,Boston,Bedford,01-01-2007,manufacturing,Manufacturing
86,/organization/1366-technologies,/funding-round/786f61aa9866f4471151285f5c56be36,venture,A,03-02-2010,5.15,/organization/1366-technologies,1366 Technologies,http://www.1366tech.com,Manufacturing,operating,USA,MA,Boston,Bedford,01-01-2007,manufacturing,Manufacturing
87,/organization/1366-technologies,/funding-round/82ace97530965cd2be8f262836b43ff5,venture,A,27-03-2008,12.40,/organization/1366-technologies,1366 Technologies,http://www.1366tech.com,Manufacturing,operating,USA,MA,Boston,Bedford,01-01-2007,manufacturing,Manufacturing
88,/organization/1366-technologies,/funding-round/ab99fc5a53717b1b53fd6aa5687c5fa9,venture,B,16-12-2010,6.00,/organization/1366-technologies,1366 Technologies,http://www.1366tech.com,Manufacturing,operating,USA,MA,Boston,Bedford,01-01-2007,manufacturing,Manufacturing
103,/organization/170-systems,/funding-round/b84bb882ca873f5fb96535671981196d,venture,A,16-04-2002,14.00,/organization/170-systems,170 Systems,http://www.170systems.com,Software,acquired,USA,MA,Boston,Bedford,01-01-1990,software,Others


In [46]:
# Creating the dataframe D1 for United States
D1 = frame_fundingtype_country_range[frame_fundingtype_country_range.country_code == 'USA']
# Adding the new column for total number (or count) of investments for each main sector
D1['count_main_Sector'] = D1.main_sector.map(D1.groupby('main_sector')['main_sector'].count())
# Adding the new column for total amount invested in each main sector
D1['raised_amount_main_Sector'] = D1.main_sector.map(D1.groupby('main_sector')['raised_amount_usd'].sum())
D1

Unnamed: 0,company_permalink,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,founded_at,primary_sector,main_sector,count_main_Sector,raised_amount_main_Sector
10,/organization/0xdata,/funding-round/e1cfcbe1bdf4c70277c5f29a3482f24e,venture,A,19-07-2014,8.90,/organization/0xdata,H2O.ai,http://h2o.ai/,Analytics,operating,USA,CA,SF Bay Area,Mountain View,01-01-2011,analytics,"Social, Finance, Analytics, Advertising",2714.00,23807.38
16,/organization/1-mainstream,/funding-round/b952cbaf401f310927430c97b68162ea,venture,,17-03-2015,5.00,/organization/1-mainstream,1 Mainstream,http://www.1mainstream.com,Apps|Cable|Distribution|Software,acquired,USA,CA,SF Bay Area,Cupertino,01-03-2012,apps,"News, Search and Messaging",1583.00,13971.57
78,/organization/128-technology,/funding-round/fb6216a30cb566ede89e0bee0623a634,venture,,16-12-2014,12.00,/organization/128-technology,128 Technology,http://www.128technology.com/,Service Providers|Technology,operating,USA,MA,Boston,Burlington,07-07-2014,service providers,Others,2950.00,26321.01
84,/organization/1366-technologies,/funding-round/424129ce1235cfab2655ee81305f7c2b,venture,C,15-10-2013,15.00,/organization/1366-technologies,1366 Technologies,http://www.1366tech.com,Manufacturing,operating,USA,MA,Boston,Bedford,01-01-2007,manufacturing,Manufacturing,799.00,7258.55
85,/organization/1366-technologies,/funding-round/6d3f3797371956ece035b8478c1441b2,venture,C,09-04-2015,5.00,/organization/1366-technologies,1366 Technologies,http://www.1366tech.com,Manufacturing,operating,USA,MA,Boston,Bedford,01-01-2007,manufacturing,Manufacturing,799.00,7258.55
86,/organization/1366-technologies,/funding-round/786f61aa9866f4471151285f5c56be36,venture,A,03-02-2010,5.15,/organization/1366-technologies,1366 Technologies,http://www.1366tech.com,Manufacturing,operating,USA,MA,Boston,Bedford,01-01-2007,manufacturing,Manufacturing,799.00,7258.55
87,/organization/1366-technologies,/funding-round/82ace97530965cd2be8f262836b43ff5,venture,A,27-03-2008,12.40,/organization/1366-technologies,1366 Technologies,http://www.1366tech.com,Manufacturing,operating,USA,MA,Boston,Bedford,01-01-2007,manufacturing,Manufacturing,799.00,7258.55
88,/organization/1366-technologies,/funding-round/ab99fc5a53717b1b53fd6aa5687c5fa9,venture,B,16-12-2010,6.00,/organization/1366-technologies,1366 Technologies,http://www.1366tech.com,Manufacturing,operating,USA,MA,Boston,Bedford,01-01-2007,manufacturing,Manufacturing,799.00,7258.55
103,/organization/170-systems,/funding-round/b84bb882ca873f5fb96535671981196d,venture,A,16-04-2002,14.00,/organization/170-systems,170 Systems,http://www.170systems.com,Software,acquired,USA,MA,Boston,Bedford,01-01-1990,software,Others,2950.00,26321.01
109,/organization/17zuoye,/funding-round/69690484f51e15bc27ff52bfe472cd96,venture,A,01-01-2011,5.00,/organization/17zuoye,17zuoye,http://www.17zuoye.com/,Education|Language Learning,operating,USA,VA,VA - Other,Shanghai,01-01-2007,education,Others,2950.00,26321.01


In [47]:
# Creating the dataframe D2 for United Kingdom
D2 = frame_fundingtype_country_range[frame_fundingtype_country_range.country_code == 'GBR']
# Adding the new column for total number (or count) of investments for each main sector
D2['count_main_Sector'] = D2.main_sector.map(D2.groupby('main_sector')['main_sector'].count())
# Adding the new column for total amount invested in each main sector
D2['raised_amount_main_Sector'] = D2.main_sector.map(D2.groupby('main_sector')['raised_amount_usd'].sum())
D2

Unnamed: 0,company_permalink,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,founded_at,primary_sector,main_sector,count_main_Sector,raised_amount_main_Sector
309,/organization/365scores,/funding-round/48212f931f542fdef78810bc87aef086,venture,B,29-09-2014,5.50,/organization/365scores,365Scores,http://biz.365scores.com,Android|Apps|iPhone|Mobile|Sports,operating,GBR,H9,London,London,01-01-2008,android,"Social, Finance, Analytics, Advertising",133,1089.40
671,/organization/7digital,/funding-round/b5ad7ed7baddd3974bd51403f17dd88f,venture,A,01-01-2008,8.47,/organization/7digital,7digital,http://about.7digital.com,Content Creators|Content Delivery|Licensing|Mu...,acquired,GBR,H9,London,London,01-01-2004,content creators,Entertainment,56,482.78
672,/organization/7digital,/funding-round/eafacfcceb1fbc4fd605f641b603313e,venture,,19-10-2012,10.00,/organization/7digital,7digital,http://about.7digital.com,Content Creators|Content Delivery|Licensing|Mu...,acquired,GBR,H9,London,London,01-01-2004,content creators,Entertainment,56,482.78
730,/organization/90min,/funding-round/21a2cbf6f2fb2a1c2a61e04bf930dfe6,venture,,06-10-2015,15.00,/organization/90min,90min,http://www.90min.com,Media|News|Publishing|Soccer|Sports,operating,GBR,H9,London,London,01-01-2011,media,Entertainment,56,482.78
731,/organization/90min,/funding-round/bd626ed022f5c66574b1afe234f3c90d,venture,,07-05-2013,5.80,/organization/90min,90min,http://www.90min.com,Media|News|Publishing|Soccer|Sports,operating,GBR,H9,London,London,01-01-2011,media,Entertainment,56,482.78
907,/organization/abcodia,/funding-round/3d20c23d203134ed86c0d1b2bec288b2,venture,B,18-05-2015,8.26,/organization/abcodia,Abcodia,http://abcodia.com,Biotechnology,operating,GBR,H9,London,London,01-01-2010,biotechnology,Cleantech / Semiconductors,130,1163.99
1065,/organization/acacia-pharma,/funding-round/26eac8a3875e1a7bc68d36ceb7a71b05,venture,A,31-03-2011,10.00,/organization/acacia-pharma,Acacia Pharma,http://www.acaciapharma.com,Biotechnology,operating,GBR,C3,London,Cambridge,01-01-2006,biotechnology,Cleantech / Semiconductors,130,1163.99
1088,/organization/acal-energy,/funding-round/16817efd4bbef10f63cccdf158f47a41,venture,,15-06-2011,9.91,/organization/acal-energy,ACAL Energy,http://www.acalenergy.co.uk,Clean Technology,operating,GBR,C5,Runcorn,Runcorn,01-08-2004,clean technology,Cleantech / Semiconductors,130,1163.99
1091,/organization/acal-enterprise-solutions,/funding-round/0f9a693d9686330c5c2724215e0048e2,venture,,03-06-2014,10.72,/organization/acal-enterprise-solutions,Acal Enterprise Solutions,http://acalenterprisesolutions.com,Information Technology,operating,GBR,J8,Nottingham,Nottingham,01-01-1990,information technology,"Social, Finance, Analytics, Advertising",133,1089.40
1166,/organization/accent-media-ltd,/funding-round/9dc643fa45031a46ffcfaa061d94e3e3,venture,,01-07-2013,8.30,/organization/accent-media-ltd,Accent Media Limited,http://accent.media,Domains|Internet|Ticketing,operating,GBR,H9,London,London,21-02-2012,domains,"News, Search and Messaging",73,615.75


In [48]:
# Creating the dataframe D3 for India
D3 = frame_fundingtype_country_range[frame_fundingtype_country_range.country_code == 'IND']
# Adding the new column for total number (or count) of investments for each main sector
D3['count_main_Sector'] = D3.main_sector.map(D3.groupby('main_sector')['main_sector'].count())
# Adding the new column for total amount invested in each main sector
D3['raised_amount_main_Sector'] = D3.main_sector.map(D3.groupby('main_sector')['raised_amount_usd'].sum())
D3

Unnamed: 0,company_permalink,funding_round_permalink,funding_round_type,funding_round_code,funded_at,raised_amount_usd,permalink,name,homepage_url,category_list,status,country_code,state_code,region,city,founded_at,primary_sector,main_sector,count_main_Sector,raised_amount_main_Sector
0,/organization/-fame,/funding-round/9a01d05418af9f794eebff7ace91f638,venture,B,05-01-2015,10.00,/organization/-fame,#fame,http://livfame.com,Media,operating,IND,16,Mumbai,Mumbai,,media,Entertainment,33,280.83
178,/organization/21diamonds-india,/funding-round/6de7ffef8091ba9f33821f4b861f434a,venture,C,15-11-2012,6.37,/organization/21diamonds-india,21Diamonds,http://www.21diamonds.de,E-Commerce,operating,IND,10,New Delhi,Gurgaon,01-06-2012,e-commerce,Others,110,1013.41
810,/organization/a-little-world,/funding-round/18d98f82ed392b1609975b81f3e8b3fb,venture,B,09-09-2008,6.41,/organization/a-little-world,A LITTLE WORLD,http://alittleworld.com,Finance,operating,IND,16,Mumbai,Mumbai,02-03-2000,finance,"Social, Finance, Analytics, Advertising",60,550.55
2051,/organization/adlabs-imagica,/funding-round/508d3c83daaae9fda3ba6f9682c78f6c,venture,,28-10-2014,8.18,/organization/adlabs-imagica,Adlabs Imagica,http://www.adlabsimagica.com,Entertainment|Tourism,operating,IND,16,IND - Other,Khopoli,,entertainment,Entertainment,33,280.83
2849,/organization/agile,/funding-round/cd3dd1c98ce9d0f632d8752163941674,venture,A,01-05-2011,5.74,/organization/agile,Agile,http://www.agile-ft.com,Finance|Finance Technology|FinTech|Insurance,operating,IND,16,Mumbai,Mumbai,,finance,"Social, Finance, Analytics, Advertising",60,550.55
3376,/organization/akosha,/funding-round/908a8813e2273a2bc604bf40a45c15ff,venture,A,01-07-2014,5.00,/organization/akosha,Akosha,http://www.akosha.com,Consumer Internet|Digital Media|Enterprise Sof...,operating,IND,7,New Delhi,New Delhi,01-06-2010,consumer internet,"Social, Finance, Analytics, Advertising",60,550.55
4205,/organization/amagi-media-labs,/funding-round/f244a91cc714317f6fbbc80dcc1d5135,venture,A,17-06-2013,5.50,/organization/amagi-media-labs,Amagi Media Labs,http://amagi.com,Advertising,operating,IND,19,Bangalore,Bangalore,01-01-2008,advertising,"Social, Finance, Analytics, Advertising",60,550.55
4419,/organization/ameyo,/funding-round/81b50a403d5d2293715fe9b0ce4db5d3,venture,A,03-07-2015,5.00,/organization/ameyo,Ameyo,http://www.ameyo.com/,Software,operating,IND,10,New Delhi,Gurgaon,01-01-2003,software,Others,110,1013.41
4856,/organization/ani-technologies,/funding-round/1e2b54335e2a41d8d7db25b7c11db399,venture,A,10-04-2012,5.00,/organization/ani-technologies,Ola,http://www.olacabs.com,Automotive|E-Commerce|Internet|Mobile|Mobile C...,operating,IND,28,Kolkata,Kolkata,03-12-2010,automotive,Automotive & Sports,13,136.90
4911,/organization/annapurna-microfinace,/funding-round/3f03bc9fea4ae59b1ce8c86a0782107e,venture,B,26-03-2014,5.00,/organization/annapurna-microfinace,Annapurna Microfinace,http://ampl.net.in,Finance,operating,IND,21,Bhubaneswar,Bhubaneswar,01-01-2009,finance,"Social, Finance, Analytics, Advertising",60,550.55


In [49]:
# Total number of Investments (count) for USA
print(len(D1.index))
# Total number of Investments (count) for GBR
print(len(D2.index))
# Total number of Investments (count) for IND
print(len(D3.index))

12150
628
330


In [50]:
# Total amount of investment (USD) for USA
print(round(D1.raised_amount_usd.sum(),2))
# Total amount of investment (USD) for GBR
print(round(D2.raised_amount_usd.sum(),2))
# Total amount of investment (USD) for IND
print(round(D3.raised_amount_usd.sum(),2))

108531.35
5436.84
2976.54


In [51]:
# Top 3 Sector name (no. of investment-wise) for USA
D1.groupby('main_sector')['main_sector'].count().sort_values(ascending=False).head(3)

main_sector
Others                                     2950
Social, Finance, Analytics, Advertising    2714
Cleantech / Semiconductors                 2350
Name: main_sector, dtype: int64

In [52]:
# Top 3 Sector name (no. of investment-wise) for GBR
D2.groupby('main_sector')['main_sector'].count().sort_values(ascending=False).head(3)

main_sector
Others                                     147
Social, Finance, Analytics, Advertising    133
Cleantech / Semiconductors                 130
Name: main_sector, dtype: int64

In [53]:
# Top 3 Sector name (no. of investment-wise) for IND
D3.groupby('main_sector')['main_sector'].count().sort_values(ascending=False).head(3)

main_sector
Others                                     110
Social, Finance, Analytics, Advertising     60
News, Search and Messaging                  52
Name: main_sector, dtype: int64

In [54]:
# Company in USA which recieved the highest investment for top sector count-wise
D1[D1.main_sector=="Others"].groupby(['company_permalink','name'])['raised_amount_usd'].sum().sort_values(ascending=False).head(1)

company_permalink          name       
/organization/virtustream  Virtustream   64.30
Name: raised_amount_usd, dtype: float64

In [55]:
# Company in USA which recieved the highest investment for second top sector count-wise
D1[D1.main_sector=="Social, Finance, Analytics, Advertising"].groupby(['company_permalink','name'])['raised_amount_usd'].sum().sort_values(ascending=False).head(1)

company_permalink          name                           
/organization/shotspotter  SST Inc. (Formerly ShotSpotter)   67.93
Name: raised_amount_usd, dtype: float64

In [56]:
# Company in GBR which recieved the highest investment for top sector count-wise
D2[D2.main_sector=="Others"].groupby(['company_permalink','name'])['raised_amount_usd'].sum().sort_values(ascending=False).head(1)

company_permalink             name          
/organization/electric-cloud  Electric Cloud   37.00
Name: raised_amount_usd, dtype: float64

In [57]:
# Company in GBR which recieved the highest investment for second top sector count-wise
D2[D2.main_sector=="Social, Finance, Analytics, Advertising"].groupby(['company_permalink','name'])['raised_amount_usd'].sum().sort_values(ascending=False).head(1)

company_permalink                    name                 
/organization/celltick-technologies  Celltick Technologies   37.50
Name: raised_amount_usd, dtype: float64

In [58]:
# Company in IND which recieved the highest investment for top sector count-wise
D3[D3.main_sector=="Others"].groupby(['company_permalink','name'])['raised_amount_usd'].sum().sort_values(ascending=False).head(1)

company_permalink           name        
/organization/firstcry-com  FirstCry.com   39.00
Name: raised_amount_usd, dtype: float64

In [59]:
# Company in IND which recieved the highest investment for second top sector count-wise
D3[D3.main_sector=="Social, Finance, Analytics, Advertising"].groupby(['company_permalink','name'])['raised_amount_usd'].sum().sort_values(ascending=False).head(1)

company_permalink              name           
/organization/manthan-systems  Manthan Systems   50.70
Name: raised_amount_usd, dtype: float64