In [3]:
import pandas as pd

# Load the cleaned data from CSV files
employees_df = pd.read_csv('../prep/employees_cleaned.csv')
courses_df = pd.read_csv('../prep/courses_cleaned.csv')
skills_df = pd.read_csv('../prep/skills_cleaned.csv')
assessments_df = pd.read_csv('../prep/employee_skill_assessments_cleaned.csv')

# 1. Merge employees with assessments on 'employee_id'
employee_assessment_df = pd.merge(assessments_df, employees_df, how='inner', left_on='employee_Id', right_on='employee_id')

# 2. Merge the resulting DataFrame with courses on 'course_id'
employee_assessment_course_df = pd.merge(employee_assessment_df, courses_df, how='inner', on='course_id')

# 3. Merge the resulting DataFrame with skills on 'skill_id'
curated_df = pd.merge(employee_assessment_course_df, skills_df, how='inner', on='skill_id')

# Reorder columns for better readability (optional)
curated_df = curated_df[[
    'assessment_id', 'employee_id', 'employee_name', 'email', 'role', 'department_x', 
    'course_id', 'course_name', 'competencyLevel', 'rating', 'skill_id', 'skill_name', 
    'marks', 'date', 'approvalStatus'
]]

# Rename 'department_x' to 'department'
curated_df.rename(columns={'department_x': 'department'}, inplace=True)

# Save the final curated table to a CSV file
curated_df.to_csv('curated_report.csv', index=False)

# Print the first few rows to verify the curated table
print("Curated Reporting Table:")
print(curated_df.head())


Curated Reporting Table:
   assessment_id  employee_id     employee_name                       email  \
0              1        17995       Tony Castro          fgates@example.org   
1              2        15754   Heather Stanley        joseph06@example.org   
2              3        49440      William Ryan       patrick46@example.net   
3              4         9008  Jennifer Stewart  kristinaparker@example.org   
4              5        12551     Thomas Garcia         oconner@example.com   

                role          department  course_id  \
0   Business Analyst   Quality Assurance         46   
1     Data Scientist   Quality Assurance        156   
2  Software Engineer        Data Science        120   
3     Data Scientist              DevOps         26   
4     Data Scientist  Product Management         26   

                          course_name competencyLevel  rating  skill_id  \
0                        Cloud Devops        advanced       2        31   
1  Data Pipelines W