In [17]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import plotly.express as px
import plotly.graph_objects as go
import altair as alt

In [18]:
employee_performance = pd.read_csv('../data_engineering/data_warehouse/report/employee_performance_kpi.csv')

In [19]:
employee_performance.columns

Index(['user_id', 'name', 'designation', 'course_id', 'course_title',
       'course_tag', 'course_duration', 'modules_completed', 'total_modules',
       'completion_percentage', 'course_score', 'performance_score'],
      dtype='object')

HYPOTHESIS

Hypothesis: Employees with a higher completion percentage in courses achieve higher performance scores.

In [20]:
performance_summary = employee_performance.groupby(['user_id', 'name', 'designation']).agg({
    'completion_percentage': 'mean',
    'performance_score': 'mean'
}).reset_index()


chart1 = alt.Chart(performance_summary).mark_circle(size=60).encode(
    x=alt.X('completion_percentage:Q', title='Average Completion Percentage (%)'),
    y=alt.Y('performance_score:Q', title='Average Performance Score'),
    tooltip=['completion_percentage', 'performance_score']
).properties(
    title='Course Completion Percentage vs. Performance Score',
    width=600,
    height=400
).interactive()

chart1.show()

Hypothesis: Employees who achieve higher course scores in their completed courses will have higher overall performance scores.

In [21]:
scatter_plot = alt.Chart(employee_performance).mark_circle(size=60, opacity=0.6).encode(
    x=alt.X('course_score:Q', title='Course Score'),
    y=alt.Y('performance_score:Q', title='Performance Score'),
    tooltip=['user_id', 'name', 'course_title', 'course_score', 'performance_score']
).properties(
    title='Relationship Between Course Scores and Performance Scores',
    width=600,
    height=400
).interactive()


scatter_plot.show()

Hypothesis: Courses with longer durations lead to greater performance score improvements.

In [22]:
# Aggregate by course duration
duration_performance = employee_performance.groupby('course_duration').agg({
    'performance_score': 'mean'
}).reset_index()


chart2 = alt.Chart(duration_performance).mark_circle(size=60).encode(
    x=alt.X('course_duration:Q', title='Course Duration (Minutes)'),
    y=alt.Y('performance_score:Q', title='Average Performance Score'),
    tooltip=['course_duration', 'performance_score']
).properties(
    title='Course Duration vs. Performance Score',
    width=600,
    height=400
).interactive()

chart2.show()


Hypothesis: There is a positive relationship between the number of modules completed in a course and the course score achieved by the employee.

In [23]:
# Aggregate data based on modules completed
modules_performance = employee_performance.groupby('modules_completed').agg({
    'course_score': 'mean'
}).reset_index()


chart4 = alt.Chart(modules_performance).mark_circle(size=60).encode(
    x=alt.X('modules_completed:Q', title='Modules Completed'),
    y=alt.Y('course_score:Q', title='Average Course Score'),
    tooltip=['modules_completed', 'course_score']
).properties(
    title='Modules Completed vs. Course Score',
    width=600,
    height=400
).interactive()

chart4.show()


Hypothesis : Impact of Course Tags on Course score

In [24]:
employee_performance = pd.read_csv('../data_engineering/data_warehouse/report/employee_performance_kpi.csv')

# Aggregate data based on course tags and course scores
tag_score_summary = employee_performance.groupby('course_tag').agg({
    'course_score': 'mean'
}).reset_index()


threshold_value = 60


tag_score_summary['course_tag_grouped'] = tag_score_summary['course_score'].apply(
    lambda score: 'Above 60' if score >= threshold_value else 'Below 60'
)


chart = alt.Chart(tag_score_summary).mark_bar().encode(
    x=alt.X('course_tag:O', title='Course Tag', sort='-y'),
    y=alt.Y('course_score:Q', title='Average Course Score'),
    color=alt.Color('course_tag_grouped:N', 
                    scale=alt.Scale(domain=['Above 60', 'Below 60'], 
                                    range=['#FFA500', '#FFDDC1']),
                    title='Tag Group'),
    tooltip=['course_tag', 'course_score']
).properties(
    title='Impact of Course Tags on Course Scores (Above and Below 60)',
    width=600,
    height=400
).interactive()

chart.show()