In [None]:
import numpy as np
import pandas as pd
import plotnine as gg

In [None]:
df_clean = pd.read_csv('../data/clean_factset_campaign_data.csv', parse_dates=['campaign_announcement_date'])

In [None]:
df_clean.iloc[0]

In [None]:
df_clean.dtypes.to_frame('type')

In [None]:
len(df_clean)

# Campaigns

Keyed by `(campaign_id, activist_id, company_id)`.

In [None]:
df_clean.campaign_id.nunique()

In [None]:
df_campaign = (
    df_clean
    .groupby('campaign_id')
    .last()
    .reset_index()
)

In [None]:
df_campaign.head(5)

In [None]:
df_campaign.groupby('campaign_objective_primary').campaign_id.count().sort_values(ascending=False).to_frame('count')

In [None]:
df_campaign.groupby('value_demand').campaign_id.count().sort_values(ascending=False).to_frame('count')

In [None]:
df_campaign.groupby('governance_demand').campaign_id.count().sort_values(ascending=False).to_frame('count')

In [None]:
df_campaign[[c for c in df_campaign if 'return' in c]].describe()

# Tactics

Keyed by `(campaign_id, activist_id, company_id, activist_campaign_tactic)`.

In [None]:
df_tactic = (
    df_clean
    .groupby('campaign_id')
    [
        'activist_id',
        'company_id',
        'activist_campaign_tactic'
    ]
    .last()
    .reset_index()
    .assign(activist_campaign_tactic=lambda df: df.activist_campaign_tactic.fillna('No or Unknown'))
    .assign(activist_campaign_tactic=lambda df: df.activist_campaign_tactic.str.split(', '))
    .explode('activist_campaign_tactic')
    .assign(activist_campaign_tactic_indicator=1)
)

In [None]:
df_tactic.head()

In [None]:
df_tactic.groupby('activist_campaign_tactic').campaign_id.count().sort_values(ascending=False).to_frame('count')

# Activists

Keyed by `(activist_id)`.

In [None]:
df_clean.activist_id.nunique()

In [None]:
df_activist = (
    df_clean
    .groupby('activist_id')
    ['activist_name', 'activist_group']
    .last()
    .reset_index()
)

df_activist.head(5)

In [None]:
(
    pd.merge(
        df_activist,
        df_campaign.groupby('activist_id').campaign_id.count().to_frame('campaign_count'),
        how='left',
        on=['activist_id']
    )
    .sort_values(by='campaign_count', ascending=False)
    .head(10)
)

# Targets

Keyed by `(company_id)`.

In [None]:
df_clean.company_id.nunique()

In [None]:
df_company = (
    df_clean
    .groupby('company_id')
    [
        'company_name',
        'sector',
        'current_entity_status',
        'current_entity_detail'
    ]
    .last()
    .reset_index()
)

In [None]:
df_company.head(10)