In [1]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

import bokeh
from bokeh.plotting import figure, show
from bokeh.palettes import Spectral5

from ipywidgets import interact, interact_manual
from ipywidgets import RadioButtons

In [2]:
from bokeh.io import output_notebook
output_notebook()

In [3]:
path = r'C:\Users\16302\Documents\Coding Samples\vaccine-job-applies'

In [4]:
def load_job_activity(path, fname):
    job_activity = pd.read_csv(os.path.join(path, fname))
    job_activity = job_activity.drop(['Date', 'Enterprise ID', 'Currency', 'Employer ID'], axis=1)  
    job_activity = job_activity.loc[(job_activity['Body ID'] != '00000000-0000-0000-0000-000000000000')]
    return job_activity

def load_vaccine_ref(path, fname):
    vaccine_ref = pd.read_csv(os.path.join(path, fname))
    vaccine_ref = vaccine_ref.loc[(vaccine_ref['Tag'] != '2')]
    vaccine_ref = vaccine_ref.dropna(subset=['Tag'])
    return vaccine_ref

def concatenate(fnames):
    long = pd.concat(fnames, ignore_index=True)
    return long

def merge(job_activity, vaccine_ref):
    df = job_activity.merge(vaccine_ref, on='Body ID', how='outer')
    df = df.drop(['Matches'], axis=1)
    df['Tag'] = df['Tag'].fillna('Not Applicable') #change to 'Not Applicable' or 'No Vaccine Mentioned' depending on separation
    return df

def calc_cta(df):
    df['Click to Apply (%)'] = (df['Paid Applies']/df['Paid Clicks'])*100
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df = df.dropna(subset=['Click to Apply (%)'])
    return df
    
def diff_cta(df, on):
    df_na = df.query('Tag=="Not Applicable"').rename(columns={'Click to Apply (%)':'CTA NA'})
    df_covid = df.query('Tag=="COVID Vaccine Required"').rename(columns={'Click to Apply (%)':'CTA COVID'})
    df_diff = df_na[[on, 'CTA NA']].merge(df_covid[[on, 'CTA COVID']], on=on, how='inner')
    df_diff['Difference in CTA (%)'] = df_diff['CTA NA']-df_diff['CTA COVID']
    return df_diff

def aggregate_cta(df):
    agg_cta = df.groupby(['Tag'])['Paid Clicks', 'Paid Applies', 'Count of Jobs'].apply(lambda x : x.sum()).reset_index()
    agg_cta = calc_cta(agg_cta)
    return agg_cta

def by_category(df, category):
    result = df.groupby([category, 'Tag'])['Paid Clicks', 'Paid Applies', 'Count of Jobs'].apply(lambda x : x.sum()).reset_index()
    result = calc_cta(result)
    return result

In [5]:
#load
sept_job_activity = load_job_activity(path, 'September Job Activity Data-edited.csv')
sept_vaccine_ref = load_vaccine_ref(path, 'September Vaccine Reference Data.csv')
oct_job_activity = load_job_activity(path, 'October Job Activity Data-edited.csv')
oct_vaccine_ref = load_vaccine_ref(path, 'October Vaccine Reference Data.csv')

#concatenate & merge
job_activity = concatenate([sept_job_activity, oct_job_activity])
job_activity = job_activity.drop(['Discipline ID'], axis=1)  
vaccine_ref = concatenate([sept_vaccine_ref, oct_vaccine_ref])

df = merge(job_activity, vaccine_ref)

In [6]:
#industry level
df_industry = by_category(df, 'Discipline Name')

#create list of unique industries
industries = [each for each in df_industry['Discipline Name'].unique()]

#job function level, interested job categories
df_job_cat = by_category(df, 'Tagged Category')

#create list of job categories of interest
my_cat = ['29-1141.00 - Registered Nurses',
                 '35-3031.00 - Waiters and Waitresses',
                 '41-2031.00 - Retail Salespersons',
                 '43-4051.00 - Customer Service Representatives',
                 '49-3023.00 - Automotive Service Technicians and Mechanics',
                 '49-9071.00 - Maintenance and Repair Workers, General',
                 '53-3031.00 - Driver/Sales Workers',
                 '53-3032.00 - Heavy and Tractor-Trailer Truck Drivers',
                 '53-3033.00 - Light Truck or Delivery Services Drivers']

#subset based on those categories and clean titles
df_my_cat = df_job_cat[df_job_cat['Tagged Category'].isin(my_cat)]
df_my_cat['Tagged Category'] = df_my_cat['Tagged Category'].map(lambda x: x[13:])

#create list of unique job categories
categories = [each for each in df_my_cat['Tagged Category'].unique()]

  result = df.groupby([category, 'Tag'])['Paid Clicks', 'Paid Applies', 'Count of Jobs'].apply(lambda x : x.sum()).reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_my_cat['Tagged Category'] = df_my_cat['Tagged Category'].map(lambda x: x[13:])


In [7]:
@interact(Industry=industries, Metric=RadioButtons(options=['Clicks', 'Applies', 'Click to Apply (%)']))
def interact_plot_industry(Industry='Administration', Metric='Clicks'):
          
    if Metric == 'Clicks':
        col='Paid Clicks'
    elif Metric == 'Applies':
        col='Paid Applies'
    elif Metric == 'Click to Apply (%)':
        col='Click to Apply (%)'
    
    subset = df_industry.loc[df_industry['Discipline Name'] == Industry]
    tags = [each for each in subset['Tag'].unique()]
    values = [each for each in subset[col]]
    
    plot = figure(x_range=tags, title=f'{col} in {Industry}', y_axis_label=Metric)
    plot.vbar(x=tags, top=values, width=0.8, color=Spectral5)
    show(plot)

interactive(children=(Dropdown(description='Industry', options=('Administration', 'Animal Care', 'Business & C…

In [9]:
@interact(Category=categories, Metric=RadioButtons(options=['Clicks', 'Applies', 'Click to Apply (%)']))
def interact_plot_job_function(Category='Registered Nurses', Metric='Clicks'):
          
    if Metric == 'Clicks':
        col='Paid Clicks'
    elif Metric == 'Applies':
        col='Paid Applies'
    elif Metric == 'Click to Apply (%)':
        col='Click to Apply (%)'
    
    subset = df_my_cat.loc[df_my_cat['Tagged Category'] == Category]
    tags = [each for each in subset['Tag'].unique()]
    values = [each for each in subset[col]]
    
    plot = figure(x_range=tags, title=f'{col} for {Category}', y_axis_label=Metric)
    plot.vbar(x=tags, top=values, width=0.8, color=Spectral5)
    show(plot)

interactive(children=(Dropdown(description='Category', options=('Registered Nurses', 'Waiters and Waitresses',…