In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import json

In [2]:
# Load workers data and format correctly
workers_df = pd.read_json('extracted/workers.json').transpose().reset_index()
workers_df.columns = ['worker_id', 'name', 'base_salary']

In [3]:
workers_df.head()

Unnamed: 0,worker_id,name,base_salary
0,w_eb5ca7e7-197b-4128-9cdd-17b8d7d07803,Efren Selva,10119
1,w_ad84fb4e-5229-4c19-91e7-5e5cf8d3f20c,Alan Brown,10715
2,w_653b3a89-c5fa-466b-a477-f0c09a724cdd,Douglas Case,8259
3,w_7fbf0deb-0c65-449a-91ee-0e9e8cff4d0c,Christa Scott,11672
4,w_cf0ff121-da11-4b5c-b191-4d895ce97512,Christopher Greer,9408


In [4]:
# Load prices.json as a dictionary and convert to DataFrame
with open('prices.json') as f:
    prices_data = json.load(f)
prices_df = pd.DataFrame(list(prices_data.items()), columns=['technical_problem', 'price'])

In [5]:
prices_df.head()

Unnamed: 0,technical_problem,price
0,browser_and_web_based_support,650
1,teams_problems,665
2,internet_problems,680
3,cloud_and_storage_solutions,690
4,zoom_problems,695


In [6]:
# Load and flatten feature_calls.json
with open('extracted/feature_calls/calls_11.json') as f:
    feature_calls_data = json.load(f)
    
# Flatten feature_calls_data
feature_records = []
for location, calls in feature_calls_data.items():
    for call_id, call_info in calls.items():
        call_info['call_id'] = call_id
        call_info['location'] = location
        feature_records.append(call_info)
feature_calls_df = pd.DataFrame(feature_records)

In [7]:
feature_calls_df.head()

Unnamed: 0,date,tlf_number,technical_problem,difficulty,commission,call_id,location
0,2024-10-23 16:54:39.515508,72345741,teams_problems,medium,665.0,c_5e239e86-20f4-4053-a77b-a01dd3971aad,bangalore
1,2024-10-23 12:18:18.515647,53336166,basic_hardware_troubleshooting,easy,576.0,c_1cbe224b-804b-4b30-bf78-198feea748f8,bangalore
2,2024-10-20 01:42:41.515693,81959025,email_related_issues,medium,700.0,c_ca2f35cc-47fb-446f-a8ad-3c0360edb98c,bangalore
3,2024-10-24 12:14:40.515734,96315388,device_and_peripheral_setup,medium,705.0,c_59b1c1cc-6362-4edd-aed8-720536d733e6,bangalore
4,2024-10-24 19:24:07.515775,54054590,internet_problems,medium,680.0,c_69e87302-cad8-42a3-9fb2-e64c0a4b14e4,bangalore


In [8]:
# Load and flatten previous_calls.json
with open('extracted/previous_calls/calls_0.json') as f:
    previous_calls_data = json.load(f)
previous_records = []
for location, calls in previous_calls_data.items():
    for call_id, call_info in calls.items():
        call_info['call_id'] = call_id
        call_info['location'] = location
        previous_records.append(call_info)
previous_calls_df = pd.DataFrame(previous_records)


In [9]:
previous_calls_df.head()

Unnamed: 0,date,tlf_number,technical_problem,difficulty,commission,call_id,location
0,2024-10-19 07:28:55.254686,85062036,zoom_problems,medium,695.0,c_a317abc5-a7e1-40c3-b486-c4eefdf8be05,bangalore
1,2024-10-22 22:08:18.254841,73320532,browser_and_web_based_support,medium,650.0,c_78d3cde6-0da9-49f1-91b8-907b180b30a2,bangalore
2,2024-10-26 08:29:09.254885,97493770,browser_and_web_based_support,easy,520.0,c_130313ec-a141-4306-825b-826c63096cff,bangalore
3,2024-10-25 07:23:24.254929,64380401,teams_problems,easy,532.0,c_4027388e-7996-4fc2-aef6-c99d66e22b09,bangalore
4,2024-10-22 19:47:52.254979,43520611,account_and_security_issues,hard,852.0,c_2d36d2d7-d07f-4431-a699-e0ac84a0b7ce,bangalore


In [10]:
# Load reports data
reports_df = pd.read_json('extracted/previous_reports/call_report_0.json')


In [11]:
# Load and flatten schedules data
with open('extracted/previous_schedules/call_shedule_0.json') as f:
    schedules_data = json.load(f)
schedule_records = []
for worker_id, calls in schedules_data.items():
    for call_id in calls:
        schedule_records.append({'worker_id': worker_id, 'call_id': call_id})
schedules_df = pd.DataFrame(schedule_records)


In [12]:
schedules_df.head()

Unnamed: 0,worker_id,call_id
0,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,c_a317abc5-a7e1-40c3-b486-c4eefdf8be05
1,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,c_256844cb-b710-4da7-8f95-238a7fdd5261
2,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,c_46bfdc38-5130-4e08-9918-4851d84930ce
3,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,c_497ac5ca-8ff3-4773-968c-2e300486c35d
4,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,c_97384374-b420-4259-aaa9-f2e1571928cb


In [13]:
# Combine prices with calls based on technical problems
feature_calls_df = feature_calls_df.merge(prices_df, on='technical_problem', how='left')

previous_calls_df = previous_calls_df.merge(prices_df, on='technical_problem', how='left')

In [14]:
# Ensure call_time is available by adding a dummy column if missing
if 'call_time' not in feature_calls_df.columns:
    feature_calls_df['call_time'] = np.nan  # Or set this with actual values if available


In [15]:
feature_calls_df.head()

Unnamed: 0,date,tlf_number,technical_problem,difficulty,commission,call_id,location,price,call_time
0,2024-10-23 16:54:39.515508,72345741,teams_problems,medium,665.0,c_5e239e86-20f4-4053-a77b-a01dd3971aad,bangalore,665,
1,2024-10-23 12:18:18.515647,53336166,basic_hardware_troubleshooting,easy,576.0,c_1cbe224b-804b-4b30-bf78-198feea748f8,bangalore,720,
2,2024-10-20 01:42:41.515693,81959025,email_related_issues,medium,700.0,c_ca2f35cc-47fb-446f-a8ad-3c0360edb98c,bangalore,700,
3,2024-10-24 12:14:40.515734,96315388,device_and_peripheral_setup,medium,705.0,c_59b1c1cc-6362-4edd-aed8-720536d733e6,bangalore,705,
4,2024-10-24 19:24:07.515775,54054590,internet_problems,medium,680.0,c_69e87302-cad8-42a3-9fb2-e64c0a4b14e4,bangalore,680,


In [16]:
# Calculate Expected Commission and Profit Discrepancy
difficulty_commission_map = {'hard': 1.2, 'medium': 1.0, 'easy': 0.8}
feature_calls_df['expected_commission'] = feature_calls_df['difficulty'].map(difficulty_commission_map) * feature_calls_df['price']
feature_calls_df['profit_discrepancy'] = feature_calls_df['commission'] - feature_calls_df['expected_commission']


In [17]:
feature_calls_df[['commission', 'expected_commission', 'profit_discrepancy', 'difficulty']][:100]

Unnamed: 0,commission,expected_commission,profit_discrepancy,difficulty
0,665.0,665.0,0.0,medium
1,576.0,576.0,0.0,easy
2,700.0,700.0,0.0,medium
3,705.0,705.0,0.0,medium
4,680.0,680.0,0.0,medium
...,...,...,...,...
95,564.0,564.0,0.0,easy
96,588.0,588.0,0.0,easy
97,840.0,840.0,0.0,hard
98,798.0,798.0,0.0,hard
