## INTENTION OF THIS WORKBOOK

This next pass will:
1. gather a dataframe for the 5 year Ad Age data range

2. fix the revenue discrepancy (1/10 of revenue?) 

3. Add a new 'Type' column, typing into five tiers (Supernova (Big Hold + Consultant), Global (Midmarket), Burgeoning (Contenders), and Independents)

~~4. Simple CAGR on the tiers over 5 years~~

In [1]:
import pandas as pd
import re

In [2]:
raw='/Users/xavier/Documents/src/dataviz/AgencyRevenueModels/adage-data/adage-900_2010.csv'
adage = pd.read_csv(raw)
adage.name = '2010'
adage

Unnamed: 0,AGENCY-COMPANY,HEADQUARTERS,2010-REVENUE,% CHG
0,Acxiom Corp.,"Little Rock, Ark.",623000,-2.4
1,Epsilon [Alliance Data Systems Corp.],"Irving, Texas",584400,19.8
2,DraftFCB* [Interpublic (DraftFCB)],Chicago/New York,530000,7.1
3,McCann Erickson Worldwide* [Interpublic (McCann)],New York,456863,1.5
4,BBDO Worldwide* [Omnicom (BBDO)],New York,450000,3.2
5,Leo Burnett Worldwide/Arc* [Publicis (Leo Burn...,Chicago,441013,7.5
6,Rapp* [Omnicom (DDB {Rapp})],New York,376500,10.7
7,JWT* [WPP (JWT)],New York,356245,9.6
8,SapientNitro [Sapient Corp.],Boston,350000,35.1
9,Edelman [Daniel J. Edelman],Chicago,338090,17.9


In [3]:
# A function to RETURN two variables from AGENCY-COMPANY column
def indySubstituteSimple(x):
    parent = re.compile("\[(.*)\]")
    owned = re.compile("(.+?)\[(.*)\]")
    p = parent.search(x)
    if p:
        o = owned.search(x)
        return o.group(1),o.group(2)
    else:
        return x, "Independent"

In [4]:
# Create two new Columns that take the two outputs of the function
adage['AGENCY-NAME'], adage['AGENCY-OWNER'] = zip(*adage['AGENCY-COMPANY'].map(indySubstituteSimple))
adage

Unnamed: 0,AGENCY-COMPANY,HEADQUARTERS,2010-REVENUE,% CHG,AGENCY-NAME,AGENCY-OWNER
0,Acxiom Corp.,"Little Rock, Ark.",623000,-2.4,Acxiom Corp.,Independent
1,Epsilon [Alliance Data Systems Corp.],"Irving, Texas",584400,19.8,Epsilon,Alliance Data Systems Corp.
2,DraftFCB* [Interpublic (DraftFCB)],Chicago/New York,530000,7.1,DraftFCB*,Interpublic (DraftFCB)
3,McCann Erickson Worldwide* [Interpublic (McCann)],New York,456863,1.5,McCann Erickson Worldwide*,Interpublic (McCann)
4,BBDO Worldwide* [Omnicom (BBDO)],New York,450000,3.2,BBDO Worldwide*,Omnicom (BBDO)
5,Leo Burnett Worldwide/Arc* [Publicis (Leo Burn...,Chicago,441013,7.5,Leo Burnett Worldwide/Arc*,Publicis (Leo Burnett)
6,Rapp* [Omnicom (DDB {Rapp})],New York,376500,10.7,Rapp*,Omnicom (DDB {Rapp})
7,JWT* [WPP (JWT)],New York,356245,9.6,JWT*,WPP (JWT)
8,SapientNitro [Sapient Corp.],Boston,350000,35.1,SapientNitro,Sapient Corp.
9,Edelman [Daniel J. Edelman],Chicago,338090,17.9,Edelman,Daniel J. Edelman


In [5]:
# Remove the bigger agency parent, e.g. 'Omnicom (child of BBDO)'
# Strip any trailing whitespaces in the names
adage.loc[:, 'AGENCY-OWNER'] = adage['AGENCY-OWNER'].apply(lambda x: re.sub(r'\([^)]*\)', '', x))
adage.loc[:, 'AGENCY-OWNER'] = adage['AGENCY-OWNER'].apply(lambda x: re.sub(r'\s+$', '', x))
adage

Unnamed: 0,AGENCY-COMPANY,HEADQUARTERS,2010-REVENUE,% CHG,AGENCY-NAME,AGENCY-OWNER
0,Acxiom Corp.,"Little Rock, Ark.",623000,-2.4,Acxiom Corp.,Independent
1,Epsilon [Alliance Data Systems Corp.],"Irving, Texas",584400,19.8,Epsilon,Alliance Data Systems Corp.
2,DraftFCB* [Interpublic (DraftFCB)],Chicago/New York,530000,7.1,DraftFCB*,Interpublic
3,McCann Erickson Worldwide* [Interpublic (McCann)],New York,456863,1.5,McCann Erickson Worldwide*,Interpublic
4,BBDO Worldwide* [Omnicom (BBDO)],New York,450000,3.2,BBDO Worldwide*,Omnicom
5,Leo Burnett Worldwide/Arc* [Publicis (Leo Burn...,Chicago,441013,7.5,Leo Burnett Worldwide/Arc*,Publicis
6,Rapp* [Omnicom (DDB {Rapp})],New York,376500,10.7,Rapp*,Omnicom
7,JWT* [WPP (JWT)],New York,356245,9.6,JWT*,WPP
8,SapientNitro [Sapient Corp.],Boston,350000,35.1,SapientNitro,Sapient Corp.
9,Edelman [Daniel J. Edelman],Chicago,338090,17.9,Edelman,Daniel J. Edelman


In [6]:
# Transform revenue to the literal number, not 'In thousands'
# Remove the * asterisk after the agency name
adage.iloc[:,2] = adage.iloc[:,2].apply(lambda x: x*1000)
adage.iloc[:,4] = adage.iloc[:,4].apply(lambda x: re.sub(r'\*', '', x))
adage

Unnamed: 0,AGENCY-COMPANY,HEADQUARTERS,2010-REVENUE,% CHG,AGENCY-NAME,AGENCY-OWNER
0,Acxiom Corp.,"Little Rock, Ark.",623000000,-2.4,Acxiom Corp.,Independent
1,Epsilon [Alliance Data Systems Corp.],"Irving, Texas",584400000,19.8,Epsilon,Alliance Data Systems Corp.
2,DraftFCB* [Interpublic (DraftFCB)],Chicago/New York,530000000,7.1,DraftFCB,Interpublic
3,McCann Erickson Worldwide* [Interpublic (McCann)],New York,456863000,1.5,McCann Erickson Worldwide,Interpublic
4,BBDO Worldwide* [Omnicom (BBDO)],New York,450000000,3.2,BBDO Worldwide,Omnicom
5,Leo Burnett Worldwide/Arc* [Publicis (Leo Burn...,Chicago,441013000,7.5,Leo Burnett Worldwide/Arc,Publicis
6,Rapp* [Omnicom (DDB {Rapp})],New York,376500000,10.7,Rapp,Omnicom
7,JWT* [WPP (JWT)],New York,356245000,9.6,JWT,WPP
8,SapientNitro [Sapient Corp.],Boston,350000000,35.1,SapientNitro,Sapient Corp.
9,Edelman [Daniel J. Edelman],Chicago,338090000,17.9,Edelman,Daniel J. Edelman


In [7]:
adage.groupby('AGENCY-OWNER').size()

AGENCY-OWNER
Ad Venture Interactive                 1
Aegis Group                            5
Alliance Data Systems Corp.            1
Alloy Media & Marketing                1
Asatsu-DK                              1
Cheil Worldwide                        1
Chime Communications                   1
Creston                                1
D.L. Ryan Cos.                         4
Daniel J. Edelman                      3
Dentsu                                 5
Emak Worldwide                         2
Engine Group                           2
FTI Consulting                         1
FullSix Group                          1
GSI Commerce Inc.                      1
Groupe Aeroplan                        1
Grupo ABC                              2
Hakuhodo DY Holdings                   1
Harte-Hanks                            1
Havas                                  8
Hawkeye Group                          1
HealthStar Communications              2
Hearst Corp.                           1
Hun

In [8]:
adage.groupby('AGENCY-OWNER').size().to_frame(name = adage.name + '_count').reset_index()

Unnamed: 0,AGENCY-OWNER,2010_count
0,Ad Venture Interactive,1
1,Aegis Group,5
2,Alliance Data Systems Corp.,1
3,Alloy Media & Marketing,1
4,Asatsu-DK,1
5,Cheil Worldwide,1
6,Chime Communications,1
7,Creston,1
8,D.L. Ryan Cos.,4
9,Daniel J. Edelman,3


In [9]:
count_2010 = adage.groupby('AGENCY-OWNER').size().to_frame(name = adage.name + '_count').reset_index()
count_2010

Unnamed: 0,AGENCY-OWNER,2010_count
0,Ad Venture Interactive,1
1,Aegis Group,5
2,Alliance Data Systems Corp.,1
3,Alloy Media & Marketing,1
4,Asatsu-DK,1
5,Cheil Worldwide,1
6,Chime Communications,1
7,Creston,1
8,D.L. Ryan Cos.,4
9,Daniel J. Edelman,3


In [15]:
financials_2010 = adage.groupby('AGENCY-OWNER').sum().reset_index()
rev2010 = financials_2010.drop('% CHG', axis = 1)
rev2010

Unnamed: 0,AGENCY-OWNER,2010-REVENUE
0,Ad Venture Interactive,68466000
1,Aegis Group,189900000
2,Alliance Data Systems Corp.,584400000
3,Alloy Media & Marketing,39310000
4,Asatsu-DK,4988000
5,Cheil Worldwide,3000000
6,Chime Communications,3185000
7,Creston,14089000
8,D.L. Ryan Cos.,105848000
9,Daniel J. Edelman,361691000


In [16]:
pd.merge(count_2010, rev2010, left_on='AGENCY-OWNER', right_on='AGENCY-OWNER')

Unnamed: 0,AGENCY-OWNER,2010_count,2010-REVENUE
0,Ad Venture Interactive,1,68466000
1,Aegis Group,5,189900000
2,Alliance Data Systems Corp.,1,584400000
3,Alloy Media & Marketing,1,39310000
4,Asatsu-DK,1,4988000
5,Cheil Worldwide,1,3000000
6,Chime Communications,1,3185000
7,Creston,1,14089000
8,D.L. Ryan Cos.,4,105848000
9,Daniel J. Edelman,3,361691000


In [None]:
# adage.groupby('AGENCY-OWNER').size().to_csv('/Users/xavier/Documents/src/dataviz/AgencyRevenueModels/adage-data/adage-900_4tier_2015.csv', encoding='utf-8')

In [None]:
# Create arrays whice define 'The Four Tiers' of marketing companies
agency_holding_companies = ['Omnicom','Interpublic','WPP','Publicis','Dentsu','Havas']
consultant_holding_companies = ['Alliance Data Systems Corp.','Accenture','Advance Publications','Deloitte','Experian','IBM Corp.','PwC']
midmarket_holding_companies = ['MDC Partners','Project WorldWide','BlueFocus Communication Group','Cheil Worldwide','Next Fifteen Communications Group','Huntsworth','Hakuhodo DY Holdings']
contender_holding_companies = ['DJE Holdings','Engine Group','Asatsu-DK','ASM','BlueFocus Communication Group','Creston','FullSix Group','Hearst Corp.','Iris Worldwide','Klick Inc.','Marc USA','Matomy Media Group','Meredith Corp.','Mother Holdings','TMP Worldwide','Viad Corp.']

In [None]:
adage['AGENCY-OWNER'].isin(agency_holding_companies)

In [None]:
"""
agency_holding_companies
consultant_holding_companies
midmarket_holding_companies
contender_holding_companies
"""
def typer(item):
    if item in agency_holding_companies:
        return ("BIGHOLD")
    elif item in consultant_holding_companies:
        return ("CONSULTANT")        
    elif item in midmarket_holding_companies:
        return ("MIDMARKET")        
    elif item in contender_holding_companies:
        return ("CONTENDERS")        
    else:
        return ("INDY")        

In [None]:
typer('Accenture')

In [None]:
typer('Viad Corp.')

In [None]:
adage['AGENCY-TIER'] = adage['AGENCY-OWNER'].apply(typer)

In [None]:
adage

In [None]:
adage.groupby('AGENCY-TIER').size()

In [None]:
adage.groupby('AGENCY-TIER').sum()

In [None]:
## adage.to_csv('/Users/xavier/Documents/src/dataviz/AgencyRevenueModels/adage-data/adage-900_cleaned_2010.csv', encoding='utf-8')