In [3]:
import pandas as pd
import numpy as np
import json

In [4]:
# Load workers data and format correctly
workers_df = pd.read_json('extracted/workers.json').transpose().reset_index()
workers_df.columns = ['worker_id', 'name', 'base_salary']

In [5]:
workers_df.head()

Unnamed: 0,worker_id,name,base_salary
0,w_eb5ca7e7-197b-4128-9cdd-17b8d7d07803,Efren Selva,10119
1,w_ad84fb4e-5229-4c19-91e7-5e5cf8d3f20c,Alan Brown,10715
2,w_653b3a89-c5fa-466b-a477-f0c09a724cdd,Douglas Case,8259
3,w_7fbf0deb-0c65-449a-91ee-0e9e8cff4d0c,Christa Scott,11672
4,w_cf0ff121-da11-4b5c-b191-4d895ce97512,Christopher Greer,9408


In [6]:
# Load prices.json as a dictionary and convert to DataFrame
with open('Week1/prices.json') as f:
    prices_data = json.load(f)
prices_df = pd.DataFrame(list(prices_data.items()), columns=['technical_problem', 'price'])

In [4]:
prices_df.head()

Unnamed: 0,technical_problem,price
0,browser_and_web_based_support,650
1,teams_problems,665
2,internet_problems,680
3,cloud_and_storage_solutions,690
4,zoom_problems,695


In [7]:
# Load and flatten future_calls.json
with open('Week3/calls/calls_31.json') as f:
    feature_calls_data = json.load(f)
    
# Flatten feature_calls_data
feature_records = []
for location, calls in feature_calls_data.items():
    for call_id, call_info in calls.items():
        call_info['call_id'] = call_id
        call_info['location'] = location
        feature_records.append(call_info)
feature_calls_df = pd.DataFrame(feature_records)

In [8]:
feature_calls_df

Unnamed: 0,date,tlf_number,technical_problem,difficulty,commission,call_id,location
0,1970-01-01 00:00:00+00:00,30134029,basic_hardware_troubleshooting,hard,864.0,c_3d390740-68df-4579-b437-fcd1a25b0ca8,bangalore
1,1970-01-01 00:00:00+00:00,60496609,software_installation_and_configuration,hard,900.0,c_80affc8f-bf2c-4c0f-9460-9c95b2e007fa,bangalore
2,1970-01-01 00:00:00+00:00,91681812,operating_system_support,hard,882.0,c_b3278b67-74c0-42d0-9007-5b62e2f0c72e,bangalore
3,1970-01-01 00:00:00+00:00,04938996,zoom_problems,hard,834.0,c_d93e9286-1775-4e83-9711-17fa8011be6d,bangalore
4,1970-01-01 00:00:00+00:00,59536017,software_installation_and_configuration,easy,600.0,c_a7598d3e-8046-4fff-91e5-9b3eb6c52ed2,bangalore
...,...,...,...,...,...,...,...
26411,1970-01-01 00:00:00+00:00,40744548,operating_system_support,easy,588.0,c_dc439a5d-b162-4091-9a17-ac43f13d039a,hyderabad
26412,1970-01-01 00:00:00+00:00,91778025,software_installation_and_configuration,medium,750.0,c_d0264873-e288-4f28-a5d1-0074960600c4,hyderabad
26413,1970-01-01 00:00:00+00:00,49754083,zoom_problems,medium,695.0,c_08adad2e-709a-4748-828f-ddb1ec2913e5,hyderabad
26414,1970-01-01 00:00:00+00:00,83618760,device_and_peripheral_setup,hard,846.0,c_68d52beb-ba6e-48da-a847-0865d26a4ffa,hyderabad


In [9]:
# Load and flatten previous_calls.json
with open('Week2/calls/calls_21.json') as f:
    previous_calls_data = json.load(f)
previous_records = []
for location, calls in previous_calls_data.items():
    for call_id, call_info in calls.items():
        call_info['call_id'] = call_id
        call_info['location'] = location
        previous_records.append(call_info)
previous_calls_df = pd.DataFrame(previous_records)


In [10]:
previous_calls_df

Unnamed: 0,date,tlf_number,technical_problem,difficulty,commission,call_id,location
0,1970-01-01 00:00:00+00:00,20962728,device_and_peripheral_setup,medium,705.0,c_0195b1db-9c43-4a1b-a0ab-241d46b65ccc,bangalore
1,1970-01-01 00:00:00+00:00,93884912,device_and_peripheral_setup,hard,846.0,c_82aa1a0f-5bf6-49a8-9a32-ceda4b0bea08,bangalore
2,1970-01-01 00:00:00+00:00,34997608,email_related_issues,medium,700.0,c_f26adcb6-1fbd-4c5d-a844-e861dd6088e7,bangalore
3,1970-01-01 00:00:00+00:00,08890010,software_installation_and_configuration,easy,600.0,c_a930dc43-76d4-43fe-a6c4-af82427c65b5,bangalore
4,1970-01-01 00:00:00+00:00,41021032,internet_problems,hard,816.0,c_ee2e7e49-799b-48c0-a0fd-f89a02d7c463,bangalore
...,...,...,...,...,...,...,...
25517,1970-01-01 00:00:00+00:00,85406010,account_and_security_issues,medium,710.0,c_d777b5cb-66d4-4e2c-aeaf-96fc075c9b9a,hyderabad
25518,1970-01-01 00:00:00+00:00,23185048,software_installation_and_configuration,medium,750.0,c_cc3c5b8a-2f51-415a-9979-a2d8db08caec,hyderabad
25519,1970-01-01 00:00:00+00:00,17929997,internet_problems,hard,816.0,c_9bcd486e-9448-4745-9ff4-cdb9ca04bcc7,hyderabad
25520,1970-01-01 00:00:00+00:00,26338837,email_related_issues,hard,840.0,c_be9b8b36-7134-4eb4-94f4-9fa42c98ccee,hyderabad


In [1]:
# Load reports data
reports_df = pd.read_json('Week2/call_report_week2/call_report_21.json')
reports_df
previous_calls_df = previous_calls_df.merge(reports_df, on='call_id', how='left').reset_index()
previous_calls_df   

NameError: name 'pd' is not defined

In [13]:
# Load and flatten schedules data
with open('Week2/call_schedule_Uke2/call_schedule_21.json') as f:
    schedules_data = json.load(f)
schedule_records = []
for worker_id, calls in schedules_data.items():
    for call_id in calls:
        schedule_records.append({'worker_id': worker_id, 'call_id': call_id})
schedules_df = pd.DataFrame(schedule_records)
schedules_df


Unnamed: 0,worker_id,call_id
0,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960,c_0195b1db-9c43-4a1b-a0ab-241d46b65ccc
1,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960,c_1cc0ef8a-4014-46ea-a8a5-9fb2d2b996bf
2,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960,c_cf015bea-77f5-478c-81c4-168e98ac0a35
3,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960,c_f7786fe6-39e3-47c5-8539-f7e716189ef4
4,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960,c_edc1511a-d8af-4984-be9c-28bed6dfae93
...,...,...
25517,w_f5400776-c6cc-4563-b835-b5ee56242aa1,c_574a7ee7-8d94-4132-99e1-39339faebf6a
25518,w_f5400776-c6cc-4563-b835-b5ee56242aa1,c_82059e16-0ced-41af-95c5-d838301ed91d
25519,w_f5400776-c6cc-4563-b835-b5ee56242aa1,c_32ca9096-88ea-442e-8ebb-8a4a66d557e8
25520,w_f5400776-c6cc-4563-b835-b5ee56242aa1,c_e25e72c1-2140-47bc-8a3a-729f01317166


In [1]:
worker_performance = schedules_df.groupby('worker_id').agg(
        call_count=('call_id', 'count')
    ).reset_index()
worker_performance


NameError: name 'schedules_df' is not defined

In [19]:
# Combine prices with calls based on technical problems
feature_calls_df = feature_calls_df.merge(prices_df, on='technical_problem', how='left').reset_index()

previous_calls_df = previous_calls_df.merge(prices_df, on='technical_problem', how='left').reset_index()

In [20]:
# Ensure call_time is available by adding a dummy column if missing
if 'call_time' not in feature_calls_df.columns:
    feature_calls_df['call_time'] = np.nan  # Or set this with actual values if available


In [21]:
feature_calls_df.head()

Unnamed: 0,index,date,tlf_number,technical_problem,difficulty,commission,call_id,location,price_x,price_y,price,call_time
0,0,1970-01-01 00:00:00+00:00,30134029,basic_hardware_troubleshooting,hard,864.0,c_3d390740-68df-4579-b437-fcd1a25b0ca8,bangalore,720,720,720,
1,1,1970-01-01 00:00:00+00:00,60496609,software_installation_and_configuration,hard,900.0,c_80affc8f-bf2c-4c0f-9460-9c95b2e007fa,bangalore,750,750,750,
2,2,1970-01-01 00:00:00+00:00,91681812,operating_system_support,hard,882.0,c_b3278b67-74c0-42d0-9007-5b62e2f0c72e,bangalore,735,735,735,
3,3,1970-01-01 00:00:00+00:00,4938996,zoom_problems,hard,834.0,c_d93e9286-1775-4e83-9711-17fa8011be6d,bangalore,695,695,695,
4,4,1970-01-01 00:00:00+00:00,59536017,software_installation_and_configuration,easy,600.0,c_a7598d3e-8046-4fff-91e5-9b3eb6c52ed2,bangalore,750,750,750,


In [22]:
# Calculate Expected Commission and Profit Discrepancy
difficulty_commission_map = {'hard': 1.2, 'medium': 1.0, 'easy': 0.8}
feature_calls_df['expected_commission'] = feature_calls_df['difficulty'].map(difficulty_commission_map) * feature_calls_df['price']
feature_calls_df['profit_discrepancy'] = feature_calls_df['commission'] - feature_calls_df['expected_commission']


In [23]:
feature_calls_df[['commission', 'expected_commission', 'profit_discrepancy', 'difficulty']][:100]

Unnamed: 0,commission,expected_commission,profit_discrepancy,difficulty
0,864.0,864.0,0.0,hard
1,900.0,900.0,0.0,hard
2,882.0,882.0,0.0,hard
3,834.0,834.0,0.0,hard
4,600.0,600.0,0.0,easy
...,...,...,...,...
95,588.0,588.0,0.0,easy
96,690.0,690.0,0.0,medium
97,665.0,665.0,0.0,medium
98,520.0,520.0,0.0,easy


In [24]:
worker_recommendation = reports_df.merge(workers_df, on='worker_id')


In [25]:
feature_calls_df['date'] = pd.to_datetime(feature_calls_df['date']).dt.date

# Group by 'date' (now containing only date component) and count call volume
call_volume_by_date_corrected = feature_calls_df.groupby('date').size().reset_index(name='call_volume')

In [26]:
feature_calls_df

Unnamed: 0,index,date,tlf_number,technical_problem,difficulty,commission,call_id,location,price_x,price_y,price,call_time,expected_commission,profit_discrepancy
0,0,1970-01-01,30134029,basic_hardware_troubleshooting,hard,864.0,c_3d390740-68df-4579-b437-fcd1a25b0ca8,bangalore,720,720,720,,864.0,0.0
1,1,1970-01-01,60496609,software_installation_and_configuration,hard,900.0,c_80affc8f-bf2c-4c0f-9460-9c95b2e007fa,bangalore,750,750,750,,900.0,0.0
2,2,1970-01-01,91681812,operating_system_support,hard,882.0,c_b3278b67-74c0-42d0-9007-5b62e2f0c72e,bangalore,735,735,735,,882.0,0.0
3,3,1970-01-01,04938996,zoom_problems,hard,834.0,c_d93e9286-1775-4e83-9711-17fa8011be6d,bangalore,695,695,695,,834.0,0.0
4,4,1970-01-01,59536017,software_installation_and_configuration,easy,600.0,c_a7598d3e-8046-4fff-91e5-9b3eb6c52ed2,bangalore,750,750,750,,600.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26411,26411,1970-01-01,40744548,operating_system_support,easy,588.0,c_dc439a5d-b162-4091-9a17-ac43f13d039a,hyderabad,735,735,735,,588.0,0.0
26412,26412,1970-01-01,91778025,software_installation_and_configuration,medium,750.0,c_d0264873-e288-4f28-a5d1-0074960600c4,hyderabad,750,750,750,,750.0,0.0
26413,26413,1970-01-01,49754083,zoom_problems,medium,695.0,c_08adad2e-709a-4748-828f-ddb1ec2913e5,hyderabad,695,695,695,,695.0,0.0
26414,26414,1970-01-01,83618760,device_and_peripheral_setup,hard,846.0,c_68d52beb-ba6e-48da-a847-0865d26a4ffa,hyderabad,705,705,705,,846.0,0.0


In [27]:
worker_recommendation[['likely_to_recommend', 'worker_id']][:100]

Unnamed: 0,likely_to_recommend,worker_id
0,1.0,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960
1,1.0,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960
2,1.0,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960
3,1.0,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960
4,7.6,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960
...,...,...
95,6.6,w_374c6f15-8fb5-4504-9bac-cf9127fe5b2f
96,5.2,w_374c6f15-8fb5-4504-9bac-cf9127fe5b2f
97,1.0,w_374c6f15-8fb5-4504-9bac-cf9127fe5b2f
98,1.0,w_374c6f15-8fb5-4504-9bac-cf9127fe5b2f
