In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import json

In [2]:
# Load workers data and format correctly
workers_df = pd.read_json('extracted/workers.json').transpose().reset_index()
workers_df.columns = ['worker_id', 'name', 'base_salary']

In [3]:
workers_df.head()

Unnamed: 0,worker_id,name,base_salary
0,w_eb5ca7e7-197b-4128-9cdd-17b8d7d07803,Efren Selva,10119
1,w_ad84fb4e-5229-4c19-91e7-5e5cf8d3f20c,Alan Brown,10715
2,w_653b3a89-c5fa-466b-a477-f0c09a724cdd,Douglas Case,8259
3,w_7fbf0deb-0c65-449a-91ee-0e9e8cff4d0c,Christa Scott,11672
4,w_cf0ff121-da11-4b5c-b191-4d895ce97512,Christopher Greer,9408


In [4]:
# Load prices.json as a dictionary and convert to DataFrame
with open('prices.json') as f:
    prices_data = json.load(f)
prices_df = pd.DataFrame(list(prices_data.items()), columns=['technical_problem', 'price'])

In [5]:
prices_df.head()

Unnamed: 0,technical_problem,price
0,browser_and_web_based_support,650
1,teams_problems,665
2,internet_problems,680
3,cloud_and_storage_solutions,690
4,zoom_problems,695


In [6]:
# Load and flatten feature_calls.json
with open('extracted/feature_calls/calls_11.json') as f:
    feature_calls_data = json.load(f)
    
# Flatten feature_calls_data
feature_records = []
for location, calls in feature_calls_data.items():
    for call_id, call_info in calls.items():
        call_info['call_id'] = call_id
        call_info['location'] = location
        feature_records.append(call_info)
feature_calls_df = pd.DataFrame(feature_records)

In [7]:
feature_calls_df

Unnamed: 0,date,tlf_number,technical_problem,difficulty,commission,call_id,location
0,2024-10-23 16:54:39.515508,72345741,teams_problems,medium,665.0,c_5e239e86-20f4-4053-a77b-a01dd3971aad,bangalore
1,2024-10-23 12:18:18.515647,53336166,basic_hardware_troubleshooting,easy,576.0,c_1cbe224b-804b-4b30-bf78-198feea748f8,bangalore
2,2024-10-20 01:42:41.515693,81959025,email_related_issues,medium,700.0,c_ca2f35cc-47fb-446f-a8ad-3c0360edb98c,bangalore
3,2024-10-24 12:14:40.515734,96315388,device_and_peripheral_setup,medium,705.0,c_59b1c1cc-6362-4edd-aed8-720536d733e6,bangalore
4,2024-10-24 19:24:07.515775,54054590,internet_problems,medium,680.0,c_69e87302-cad8-42a3-9fb2-e64c0a4b14e4,bangalore
...,...,...,...,...,...,...,...
25311,2024-10-22 08:22:30.583100,34729150,device_and_peripheral_setup,hard,846.0,c_7f67de4f-76c1-40de-be23-ebdd0a636019,hyderabad
25312,2024-10-21 03:25:25.583138,76118231,operating_system_support,hard,882.0,c_8c4dc8eb-161a-4c60-a68e-2f54595a2756,hyderabad
25313,2024-10-25 13:25:00.583176,17245434,account_and_security_issues,easy,568.0,c_45767486-edd2-4207-a5df-3c35e15038db,hyderabad
25314,2024-10-20 17:25:45.583213,34434026,internet_problems,easy,544.0,c_603ffae3-4d8e-4b7f-ba53-e382dfdfce32,hyderabad


In [8]:
# Load and flatten previous_calls.json
with open('extracted/previous_calls/calls_0.json') as f:
    previous_calls_data = json.load(f)
previous_records = []
for location, calls in previous_calls_data.items():
    for call_id, call_info in calls.items():
        call_info['call_id'] = call_id
        call_info['location'] = location
        previous_records.append(call_info)
previous_calls_df = pd.DataFrame(previous_records)


In [9]:
previous_calls_df

Unnamed: 0,date,tlf_number,technical_problem,difficulty,commission,call_id,location
0,2024-10-19 07:28:55.254686,85062036,zoom_problems,medium,695.0,c_a317abc5-a7e1-40c3-b486-c4eefdf8be05,bangalore
1,2024-10-22 22:08:18.254841,73320532,browser_and_web_based_support,medium,650.0,c_78d3cde6-0da9-49f1-91b8-907b180b30a2,bangalore
2,2024-10-26 08:29:09.254885,97493770,browser_and_web_based_support,easy,520.0,c_130313ec-a141-4306-825b-826c63096cff,bangalore
3,2024-10-25 07:23:24.254929,64380401,teams_problems,easy,532.0,c_4027388e-7996-4fc2-aef6-c99d66e22b09,bangalore
4,2024-10-22 19:47:52.254979,43520611,account_and_security_issues,hard,852.0,c_2d36d2d7-d07f-4431-a699-e0ac84a0b7ce,bangalore
...,...,...,...,...,...,...,...
24995,2024-10-20 15:11:04.283462,41357813,device_and_peripheral_setup,medium,705.0,c_4455ccef-6dfd-4936-b16b-79d890242ca9,hyderabad
24996,2024-10-18 14:29:33.283586,72919664,internet_problems,easy,544.0,c_ff9696d1-023d-423a-9e97-738677e798a2,hyderabad
24997,2024-10-22 04:10:16.283643,51128256,device_and_peripheral_setup,medium,705.0,c_2214af0d-af07-4dfa-be04-d2ca8b32864d,hyderabad
24998,2024-10-26 01:50:42.283683,09704394,internet_problems,easy,544.0,c_3dd511d5-1e14-48cc-a556-9b9422ca245b,hyderabad


In [10]:
# Load reports data
reports_df = pd.read_json('extracted/previous_reports/call_report_0.json')


In [11]:
# Load and flatten schedules data
with open('extracted/previous_schedules/call_shedule_0.json') as f:
    schedules_data = json.load(f)
schedule_records = []
for worker_id, calls in schedules_data.items():
    for call_id in calls:
        schedule_records.append({'worker_id': worker_id, 'call_id': call_id})
schedules_df = pd.DataFrame(schedule_records)


In [25]:
worker_performance = schedules_df.groupby('worker_id').agg(
        call_count=('call_id', 'count')
    ).reset_index()
worker_performance


Unnamed: 0,worker_id,call_count
0,w_00020787-2ccf-492e-bae4-0d04a4d7ec8a,28
1,w_006be806-a7bb-4a8e-b08b-3a00a117f15a,40
2,w_010ff3f2-78e3-41fa-95e2-5443dcc2aacd,33
3,w_018e899e-f64b-41e3-9f44-c25d14cd8660,40
4,w_01abe3b4-af73-4521-b5c8-bd582d9d2d89,23
...,...,...
750,w_fdfc2986-f7d5-45b1-8ce9-d0a68b8ae16e,34
751,w_fe2a2048-30d3-4f02-8921-069584e541c3,24
752,w_fe89bc24-7071-4e47-89cd-f4b180817d55,28
753,w_ff59e406-e6df-458b-a609-d8515eb511ef,45


In [13]:
# Combine prices with calls based on technical problems
feature_calls_df = feature_calls_df.merge(prices_df, on='technical_problem', how='left')

previous_calls_df = previous_calls_df.merge(prices_df, on='technical_problem', how='left')

In [14]:
# Ensure call_time is available by adding a dummy column if missing
if 'call_time' not in feature_calls_df.columns:
    feature_calls_df['call_time'] = np.nan  # Or set this with actual values if available


In [15]:
feature_calls_df.head()

Unnamed: 0,date,tlf_number,technical_problem,difficulty,commission,call_id,location,price,call_time
0,2024-10-23 16:54:39.515508,72345741,teams_problems,medium,665.0,c_5e239e86-20f4-4053-a77b-a01dd3971aad,bangalore,665,
1,2024-10-23 12:18:18.515647,53336166,basic_hardware_troubleshooting,easy,576.0,c_1cbe224b-804b-4b30-bf78-198feea748f8,bangalore,720,
2,2024-10-20 01:42:41.515693,81959025,email_related_issues,medium,700.0,c_ca2f35cc-47fb-446f-a8ad-3c0360edb98c,bangalore,700,
3,2024-10-24 12:14:40.515734,96315388,device_and_peripheral_setup,medium,705.0,c_59b1c1cc-6362-4edd-aed8-720536d733e6,bangalore,705,
4,2024-10-24 19:24:07.515775,54054590,internet_problems,medium,680.0,c_69e87302-cad8-42a3-9fb2-e64c0a4b14e4,bangalore,680,


In [16]:
# Calculate Expected Commission and Profit Discrepancy
difficulty_commission_map = {'hard': 1.2, 'medium': 1.0, 'easy': 0.8}
feature_calls_df['expected_commission'] = feature_calls_df['difficulty'].map(difficulty_commission_map) * feature_calls_df['price']
feature_calls_df['profit_discrepancy'] = feature_calls_df['commission'] - feature_calls_df['expected_commission']


In [17]:
feature_calls_df[['commission', 'expected_commission', 'profit_discrepancy', 'difficulty']][:100]

Unnamed: 0,commission,expected_commission,profit_discrepancy,difficulty
0,665.0,665.0,0.0,medium
1,576.0,576.0,0.0,easy
2,700.0,700.0,0.0,medium
3,705.0,705.0,0.0,medium
4,680.0,680.0,0.0,medium
...,...,...,...,...
95,564.0,564.0,0.0,easy
96,588.0,588.0,0.0,easy
97,840.0,840.0,0.0,hard
98,798.0,798.0,0.0,hard


In [18]:
worker_recommendation = reports_df.merge(workers_df, on='worker_id')


In [19]:
feature_calls_df['date'] = pd.to_datetime(feature_calls_df['date']).dt.date

# Group by 'date' (now containing only date component) and count call volume
call_volume_by_date_corrected = feature_calls_df.groupby('date').size().reset_index(name='call_volume')

In [22]:
feature_calls_df

Unnamed: 0,date,tlf_number,technical_problem,difficulty,commission,call_id,location,price,call_time,expected_commission,profit_discrepancy
0,2024-10-23,72345741,teams_problems,medium,665.0,c_5e239e86-20f4-4053-a77b-a01dd3971aad,bangalore,665,,665.0,0.0
1,2024-10-23,53336166,basic_hardware_troubleshooting,easy,576.0,c_1cbe224b-804b-4b30-bf78-198feea748f8,bangalore,720,,576.0,0.0
2,2024-10-20,81959025,email_related_issues,medium,700.0,c_ca2f35cc-47fb-446f-a8ad-3c0360edb98c,bangalore,700,,700.0,0.0
3,2024-10-24,96315388,device_and_peripheral_setup,medium,705.0,c_59b1c1cc-6362-4edd-aed8-720536d733e6,bangalore,705,,705.0,0.0
4,2024-10-24,54054590,internet_problems,medium,680.0,c_69e87302-cad8-42a3-9fb2-e64c0a4b14e4,bangalore,680,,680.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
25311,2024-10-22,34729150,device_and_peripheral_setup,hard,846.0,c_7f67de4f-76c1-40de-be23-ebdd0a636019,hyderabad,705,,846.0,0.0
25312,2024-10-21,76118231,operating_system_support,hard,882.0,c_8c4dc8eb-161a-4c60-a68e-2f54595a2756,hyderabad,735,,882.0,0.0
25313,2024-10-25,17245434,account_and_security_issues,easy,568.0,c_45767486-edd2-4207-a5df-3c35e15038db,hyderabad,710,,568.0,0.0
25314,2024-10-20,34434026,internet_problems,easy,544.0,c_603ffae3-4d8e-4b7f-ba53-e382dfdfce32,hyderabad,680,,544.0,0.0


In [21]:
worker_recommendation[['likely_to_recommend', 'worker_id']][:100]

Unnamed: 0,likely_to_recommend,worker_id
0,1.0,w_3cbcf16f-2173-4281-bad0-b5579ba0f123
1,1.0,w_3cbcf16f-2173-4281-bad0-b5579ba0f123
2,1.0,w_3cbcf16f-2173-4281-bad0-b5579ba0f123
3,1.0,w_3cbcf16f-2173-4281-bad0-b5579ba0f123
4,1.0,w_3cbcf16f-2173-4281-bad0-b5579ba0f123
...,...,...
95,1.0,w_112137ec-c6c2-482a-8475-acf04ddfc22d
96,1.0,w_112137ec-c6c2-482a-8475-acf04ddfc22d
97,1.0,w_112137ec-c6c2-482a-8475-acf04ddfc22d
98,1.0,w_112137ec-c6c2-482a-8475-acf04ddfc22d
