In [1]:
import pandas as pd
import klib as kl

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px


### load files from dashboard_data directory

In [None]:
retained_industry_codes = pd.read_csv('dashboard_data/retained_industry_codes.csv', index_col=0)
dropped_industry_codes = pd.read_csv('dashboard_data/dropped_industry_codes.csv', index_col=0)
industry_counts = pd.read_csv('dashboard_data/industry_counts.csv', index_col=0)


# vized
quarterly_employment = pd.read_csv('dashboard_data/quarterly_employment.csv', index_col=0)
employment_pct_change = pd.read_csv('dashboard_data/employment_pct_change.csv', index_col=0)
wage_change = pd.read_csv('dashboard_data/wage_change.csv', index_col=0)

### Visualizations

In [4]:
custom_colors = px.colors.qualitative.Vivid  # Vibrant color scheme

fig = px.bar(
    quarterly_employment, 
    x="quarter", 
    y="count", 
    color="employment_status",  # Equivalent to hue in Seabor
    barmode="stack",  # Groups bars instead of stacking
    title="Employment Status by Quarter",
    labels={"quarter": "Quarter", "count": "Count", "employment_status": "Employment Status"},
    color_discrete_sequence=custom_colors
)

# Show the plot
fig.show()

In [51]:
pie_chart = quarterly_employment.groupby("employment_status")["count"].sum().reset_index()

In [None]:

# Map numerical values to meaningful labels
pie_chart["employment_status"] = pie_chart["employment_status"].map({0: "Not Employed", 1: "Employed"})

# Create Pie Chart
fig = px.pie(
    pie_chart, 
    names="employment_status", 
    values="count", 
    title="Employment Status Distribution at 4th Quarter After Program Exit",
    color="employment_status", 
    color_discrete_map={"Not Employed": "red", "Employed": "blue"},
    labels={"employment_status": "Employment Status", "count": "Count"}
)

# Show Plot
fig.show()


In [5]:

# Compute the bottom 20 industries by average wage change
df_plot = wage_change.groupby("industry_code")["wage_change"].mean().sort_values(ascending=False).tail(20).reset_index()

# Truncate industry names to first 100 characters
df_plot["industry_code"] = df_plot["industry_code"].astype(str).str.slice(0, 75)

# Create a horizontal bar chart
fig = px.bar(
    df_plot, 
    x="wage_change", 
    y="industry_code", 
    orientation="h", 
    title="Bottom 20 Industries by Average Wage Change",
    labels={"wage_change": "Average Wage Change", "industry_code": "Industry Code"},
    text="wage_change"
)

# Improve layout
fig.update_traces(texttemplate="%{text:.2f}", textposition="outside")
fig.update_layout(yaxis=dict(categoryorder="total ascending"))

fig.update_layout(height=800)
# Show the plot
fig.show()

In [8]:

# Compute the bottom 20 industries by average wage change
df_plot = wage_change.groupby("industry_code")["wage_change"].mean().sort_values(ascending=False).head(20).reset_index()
df_plot["industry_code"] = df_plot["industry_code"].astype(str).str.slice(0, 75)

# Create a horizontal bar chart
fig = px.bar(
    df_plot, 
    x="wage_change", 
    y="industry_code", 
    orientation="h", 
    title="Top 20 Industries by Average Wage Change",
    labels={"wage_change": "Average Wage Change", "industry_code": "Industry Code"},
    text="wage_change"
)

# Improve layout
fig.update_traces(texttemplate="%{text:.2f}", textposition="outside")
fig.update_layout(yaxis=dict(categoryorder="total ascending"))

# make it taller 
fig.update_layout(height=800)

# Show the plot
fig.show()

In [9]:

# Create a horizontal bar chart
fig = px.bar(
    employment_pct_change[-20:], 
    x="pct_change", 
    y="industry_code", 
    orientation="h", 
    title="Bottom 20 Industries by Employment Count % Change",
    labels={"pct_change": "Average Employment % Change", "industry_code": "Industry Code"},
    text="pct_change"
)

# Improve layout
fig.update_traces(texttemplate="%{text:.2f}", textposition="outside")
fig.update_layout(yaxis=dict(categoryorder="total ascending"))

# make it taller 
fig.update_layout(height=800)

# Show the plot
fig.show()

In [10]:

# Create a horizontal bar chart
fig = px.bar(
    employment_pct_change[:20], 
    x="pct_change", 
    y="industry_code", 
    orientation="h", 
    title="Top 20 Industries by Employment Count % Change",
    labels={"pct_change": "Average Employment % Change", "industry_code": "Industry Code"},
    text="pct_change"
)

# Improve layout
fig.update_traces(texttemplate="%{text:.2f}", textposition="outside")
fig.update_layout(yaxis=dict(categoryorder="total ascending"))

# make it taller 
fig.update_layout(height=800)

# Show the plot
fig.show()

In [12]:
industry_counts.columns = ['industry_code', 'count']

In [17]:
industry_counts = industry_counts.sort_values(by='count', ascending=False)

In [None]:
industry_counts["industry_code"] = industry_counts["industry_code"].astype(str).str.slice(0, 75)

# Create a bar chart using Plotly Express
fig = px.bar(
    industry_counts[1:21],
    x='industry_code',
    y='count',
    title='Industry Code Counts',
    labels={'industry_code': 'Industry Code', 'count': 'Count'},
    text=industry_counts[1:21]['count'],  # Display count values on top of bars
    height=800
)

# Customize the layout
fig.update_traces(textposition='auto')  # Position the text automatically
fig.update_layout(
    xaxis_title="Top 20 Industry Code",
    yaxis_title="Count",
    xaxis={'tickangle': -45},  # Rotate x-axis labels for readability
    bargap=0.2  # Add some gap between bars
)

# Show the plot
fig.show()

In [27]:
retained_industry_codes.columns = ['industry_code','retained_flag', 'count_of_retained_employment']

In [34]:
retained_industry_codes.head(5)

Unnamed: 0,industry_code,retained_flag,count_of_retained_employment
1531,Temporary Help Services,1,2931
611,General Medical and Surgical Hospitals,1,1446
463,Elementary and Secondary Schools,1,1247
615,General Warehousing and Storage,1,1210
589,Full-Service Restaurants,1,1175


In [35]:
retained_industry_codes["industry_code"] = retained_industry_codes["industry_code"].astype(str).str.slice(0, 75)


In [41]:
# Create Bar Chart using Plotly
fig = px.bar(
    retained_industry_codes[:30], 
    x="industry_code", 
    y="count_of_retained_employment", 
    title="Retained Employment by Industry",
    labels={"industry_code": "Industry Code", "count_of_retained_employment": "Count of Retained Employment"},
    text="count_of_retained_employment",
    color="industry_code"
)

# Update layout for readability
fig.update_traces(textposition="outside")
fig.update_layout(xaxis_tickangle=-45)
fig.update_layout(height=800, width=1500)

# Show Plot
fig.show()