In [2]:
import pandas as pd
import json
import glob

In [3]:
# Load workers data and format correctly
workers_df = pd.read_json('extracted/workers.json').transpose().reset_index()
workers_df.columns = ['worker_id', 'name', 'base_salary']

In [4]:
# Load prices.json as a dictionary and convert to DataFrame
with open('Week1/prices.json') as f:
    prices_data = json.load(f)
prices_df = pd.DataFrame(list(prices_data.items()), columns=['technical_problem', 'price'])

In [5]:
# Function to load and flatten data from multiple JSON files in a directory
def load_and_flatten_data(directory, key_field):
    records = []
    for file_path in glob.glob(f'{directory}/*.json'):
        with open(file_path) as f:
            data = json.load(f)
        for location, calls in data.items():
            for call_id, call_info in calls.items():
                call_info['call_id'] = call_id
                call_info['location'] = location
                records.append(call_info)
    return pd.DataFrame(records)

In [6]:
calls_data = {}
for file_path in glob.glob('Week2/calls/*.json'):
    with open(file_path, 'r') as file:
        file_data = json.load(file
                              )
        for location, calls in file_data.items():
            if location in calls_data:
                calls_data[location].update(calls)  # Merge calls for this location
            else:
                calls_data[location] = calls  # Initialize calls for this location

calls_data

{'bangalore': {'c_0195b1db-9c43-4a1b-a0ab-241d46b65ccc': {'date': '1970-01-01 00:00:00+00:00',
   'tlf_number': '20962728',
   'technical_problem': 'device_and_peripheral_setup',
   'difficulty': 'medium',
   'commission': 705},
  'c_82aa1a0f-5bf6-49a8-9a32-ceda4b0bea08': {'date': '1970-01-01 00:00:00+00:00',
   'tlf_number': '93884912',
   'technical_problem': 'device_and_peripheral_setup',
   'difficulty': 'hard',
   'commission': 846.0},
  'c_f26adcb6-1fbd-4c5d-a844-e861dd6088e7': {'date': '1970-01-01 00:00:00+00:00',
   'tlf_number': '34997608',
   'technical_problem': 'email_related_issues',
   'difficulty': 'medium',
   'commission': 700},
  'c_a930dc43-76d4-43fe-a6c4-af82427c65b5': {'date': '1970-01-01 00:00:00+00:00',
   'tlf_number': '08890010',
   'technical_problem': 'software_installation_and_configuration',
   'difficulty': 'easy',
   'commission': 600.0},
  'c_ee2e7e49-799b-48c0-a0fd-f89a02d7c463': {'date': '1970-01-01 00:00:00+00:00',
   'tlf_number': '41021032',
   'tec

In [8]:
# Load feature calls, previous calls, previous reports, and schedules
future_calls_df = load_and_flatten_data('Week3/calls', 'call_id')
previous_calls_df = load_and_flatten_data('Week2/calls', 'call_id')
previous_calls_df

Unnamed: 0,date,tlf_number,technical_problem,difficulty,commission,call_id,location
0,1970-01-01 00:00:00+00:00,20962728,device_and_peripheral_setup,medium,705.0,c_0195b1db-9c43-4a1b-a0ab-241d46b65ccc,bangalore
1,1970-01-01 00:00:00+00:00,93884912,device_and_peripheral_setup,hard,846.0,c_82aa1a0f-5bf6-49a8-9a32-ceda4b0bea08,bangalore
2,1970-01-01 00:00:00+00:00,34997608,email_related_issues,medium,700.0,c_f26adcb6-1fbd-4c5d-a844-e861dd6088e7,bangalore
3,1970-01-01 00:00:00+00:00,08890010,software_installation_and_configuration,easy,600.0,c_a930dc43-76d4-43fe-a6c4-af82427c65b5,bangalore
4,1970-01-01 00:00:00+00:00,41021032,internet_problems,hard,816.0,c_ee2e7e49-799b-48c0-a0fd-f89a02d7c463,bangalore
...,...,...,...,...,...,...,...
260288,1970-01-01 00:00:00+00:00,21482224,email_related_issues,easy,560.0,c_592e71fd-a512-4a6d-ac83-46779c378d42,hyderabad
260289,1970-01-01 00:00:00+00:00,54350588,browser_and_web_based_support,easy,520.0,c_fe0eb66a-f7de-4db0-b421-f98b6a1e387c,hyderabad
260290,1970-01-01 00:00:00+00:00,78885138,software_installation_and_configuration,easy,600.0,c_4e7ca62b-18d8-45fa-918c-b3da7fa5436b,hyderabad
260291,1970-01-01 00:00:00+00:00,48763576,teams_problems,hard,798.0,c_fc9f040e-99f1-4785-8fbd-e261632d5ae2,hyderabad


In [7]:
future_calls_df

Unnamed: 0,date,tlf_number,technical_problem,difficulty,commission,call_id,location
0,1970-01-01 00:00:00+00:00,30134029,basic_hardware_troubleshooting,hard,864.0,c_3d390740-68df-4579-b437-fcd1a25b0ca8,bangalore
1,1970-01-01 00:00:00+00:00,60496609,software_installation_and_configuration,hard,900.0,c_80affc8f-bf2c-4c0f-9460-9c95b2e007fa,bangalore
2,1970-01-01 00:00:00+00:00,91681812,operating_system_support,hard,882.0,c_b3278b67-74c0-42d0-9007-5b62e2f0c72e,bangalore
3,1970-01-01 00:00:00+00:00,04938996,zoom_problems,hard,834.0,c_d93e9286-1775-4e83-9711-17fa8011be6d,bangalore
4,1970-01-01 00:00:00+00:00,59536017,software_installation_and_configuration,easy,600.0,c_a7598d3e-8046-4fff-91e5-9b3eb6c52ed2,bangalore
...,...,...,...,...,...,...,...
260052,1970-01-01 00:00:00+00:00,60822405,account_and_security_issues,medium,710.0,c_089dc1ec-ca09-4d9c-82eb-9a4a1802fe23,hyderabad
260053,1970-01-01 00:00:00+00:00,62836494,operating_system_support,easy,588.0,c_984b60ae-efed-4533-b6b2-4dcaf9cf551b,hyderabad
260054,1970-01-01 00:00:00+00:00,17220135,teams_problems,easy,532.0,c_2e9a9bef-b496-4c12-9cdb-9646547d0954,hyderabad
260055,1970-01-01 00:00:00+00:00,47242825,zoom_problems,hard,834.0,c_cd0f66f1-52df-40f4-b1d2-5d26b92183f6,hyderabad


In [1]:

# Load previous and new reports
previous_reports = []
for file_path in glob.glob('Week2/call_report_week2/*.json'):
    previous_reports.extend(pd.read_json(file_path).to_dict(orient='records'))
previous_reports_df = pd.DataFrame(previous_reports)
previous_reports_df


NameError: name 'glob' is not defined

In [9]:
new_reports = []
for file_path in glob.glob('Week2\call_report_week2/*.json'):
    new_reports.extend(pd.read_json(file_path).to_dict(orient='records'))
new_reports_df = pd.DataFrame(new_reports)



In [10]:
new_reports_df

Unnamed: 0,call_id,worker_id,call_time,likely_to_recommend,professional_score,call_profit
0,c_0195b1db-9c43-4a1b-a0ab-241d46b65ccc,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960,45.522797,1.0,,705
1,c_1cc0ef8a-4014-46ea-a8a5-9fb2d2b996bf,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960,30.887813,1.0,,520
2,c_cf015bea-77f5-478c-81c4-168e98ac0a35,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960,64.506471,1.0,,840
3,c_f7786fe6-39e3-47c5-8539-f7e716189ef4,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960,64.682155,1.0,,834
4,c_edc1511a-d8af-4984-be9c-28bed6dfae93,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960,15.946662,7.6,,600
...,...,...,...,...,...,...
67138,c_33239abc-6fd2-44b4-bb54-1c5d45c1644f,w_f5400776-c6cc-4563-b835-b5ee56242aa1,15.521120,10.0,,552
67139,c_9b38a1a3-a837-4590-9577-42edaf0ccd3a,w_f5400776-c6cc-4563-b835-b5ee56242aa1,15.965120,9.8,,690
67140,c_2659df8a-b775-4904-8d45-cf8e9cf0a009,w_f5400776-c6cc-4563-b835-b5ee56242aa1,15.867863,8.5,,564
67141,c_f9ac151b-9bcc-4327-99cd-7468bcecfa0d,w_f5400776-c6cc-4563-b835-b5ee56242aa1,29.894669,4.9,,700


In [11]:
previous_reports_df

Unnamed: 0,call_id,worker_id,call_time,likely_to_recommend,professional_score,call_profit
0,c_a317abc5-a7e1-40c3-b486-c4eefdf8be05,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,57.057332,1.0,,695
1,c_256844cb-b710-4da7-8f95-238a7fdd5261,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,69.294334,1.0,,735
2,c_46bfdc38-5130-4e08-9918-4851d84930ce,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,70.794334,1.0,,735
3,c_497ac5ca-8ff3-4773-968c-2e300486c35d,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,54.379299,1.0,,532
4,c_97384374-b420-4259-aaa9-f2e1571928cb,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,77.457332,1.0,,834
...,...,...,...,...,...,...
72511,c_282338ea-b4a3-4d55-801c-1935c08be259,w_ff6501e8-b49a-4512-a9eb-e06565d21355,37.453295,1.0,,705
72512,c_9f076c7e-aa1f-44e2-a904-ec7daaec52c4,w_ff6501e8-b49a-4512-a9eb-e06565d21355,62.093346,1.0,,780
72513,c_bece22d1-0ba0-4208-91f8-366c8267177c,w_ff6501e8-b49a-4512-a9eb-e06565d21355,61.482360,1.0,,864
72514,c_ac55b479-d0ad-4c68-a637-87b3c360ceb9,w_ff6501e8-b49a-4512-a9eb-e06565d21355,32.353295,1.0,,564


In [12]:
# Combine prices with calls based on technical problems
future_calls_df = future_calls_df.merge(prices_df, on='technical_problem', how='left')
previous_calls_df = previous_calls_df.merge(prices_df, on='technical_problem', how='left')



In [13]:
# Load previous schedules
schedule_records = []
for file_path in glob.glob('extracted/previous_schedules/*.json'):
    with open(file_path) as f:
        schedules_data = json.load(f)
    for worker_id, calls in schedules_data.items():
        for call_id in calls:
            schedule_records.append({'worker_id': worker_id, 'call_id': call_id})
previous_schedules_df = pd.DataFrame(schedule_records)


In [14]:
previous_schedules_df

Unnamed: 0,worker_id,call_id
0,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,c_a317abc5-a7e1-40c3-b486-c4eefdf8be05
1,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,c_256844cb-b710-4da7-8f95-238a7fdd5261
2,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,c_46bfdc38-5130-4e08-9918-4851d84930ce
3,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,c_497ac5ca-8ff3-4773-968c-2e300486c35d
4,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,c_97384374-b420-4259-aaa9-f2e1571928cb
...,...,...
283110,w_ff6501e8-b49a-4512-a9eb-e06565d21355,c_bf99f2ec-a582-4123-8b11-28d6a1c39551
283111,w_ff6501e8-b49a-4512-a9eb-e06565d21355,c_31d6c622-ad70-4772-908c-99768c628e43
283112,w_ff6501e8-b49a-4512-a9eb-e06565d21355,c_9b26aba2-542f-4ba4-931a-1feb8ffacb8f
283113,w_ff6501e8-b49a-4512-a9eb-e06565d21355,c_541e5944-02c7-4297-a66a-6f5949bd2688


In [15]:
# Load future schedules
future_schedules = []
for file_path in glob.glob('Week2\call_schedule_Uke2/*.json'):
    with open(file_path) as f:
        schedules_data = json.load(f)
    for worker_id, calls in schedules_data.items():
        for call_id in calls:
            future_schedules.append({'worker_id': worker_id, 'call_id': call_id})
future_schedules_df = pd.DataFrame(future_schedules)


In [16]:
future_schedules_df

Unnamed: 0,worker_id,call_id
0,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960,c_0195b1db-9c43-4a1b-a0ab-241d46b65ccc
1,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960,c_1cc0ef8a-4014-46ea-a8a5-9fb2d2b996bf
2,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960,c_cf015bea-77f5-478c-81c4-168e98ac0a35
3,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960,c_f7786fe6-39e3-47c5-8539-f7e716189ef4
4,w_b36ab6fb-e94c-4aa3-84a5-ba2992dd4960,c_edc1511a-d8af-4984-be9c-28bed6dfae93
...,...,...
260288,w_f5400776-c6cc-4563-b835-b5ee56242aa1,c_a703151a-8894-4fe4-b208-514d92c5052c
260289,w_f5400776-c6cc-4563-b835-b5ee56242aa1,c_0e7226d4-d988-4ee6-a55a-5cf2e6818de3
260290,w_f5400776-c6cc-4563-b835-b5ee56242aa1,c_a3492ea4-3a2c-4d4a-a915-4503d4312672
260291,w_f5400776-c6cc-4563-b835-b5ee56242aa1,c_170c7c3a-4228-4192-93a7-c942c8b54828


In [17]:
# Calculate key performance metrics
def calculate_metrics(report_df):
    return {
        "Average Profit": report_df['call_profit'].mean(),
        "Average Call Time": report_df['call_time'].mean(),
        "Average Recommendation": report_df['likely_to_recommend'].mean(),
        
    }



In [18]:
future_calls_df

Unnamed: 0,date,tlf_number,technical_problem,difficulty,commission,call_id,location,price
0,1970-01-01 00:00:00+00:00,20962728,device_and_peripheral_setup,medium,705.0,c_0195b1db-9c43-4a1b-a0ab-241d46b65ccc,bangalore,705
1,1970-01-01 00:00:00+00:00,93884912,device_and_peripheral_setup,hard,846.0,c_82aa1a0f-5bf6-49a8-9a32-ceda4b0bea08,bangalore,705
2,1970-01-01 00:00:00+00:00,34997608,email_related_issues,medium,700.0,c_f26adcb6-1fbd-4c5d-a844-e861dd6088e7,bangalore,700
3,1970-01-01 00:00:00+00:00,08890010,software_installation_and_configuration,easy,600.0,c_a930dc43-76d4-43fe-a6c4-af82427c65b5,bangalore,750
4,1970-01-01 00:00:00+00:00,41021032,internet_problems,hard,816.0,c_ee2e7e49-799b-48c0-a0fd-f89a02d7c463,bangalore,680
...,...,...,...,...,...,...,...,...
260288,1970-01-01 00:00:00+00:00,21482224,email_related_issues,easy,560.0,c_592e71fd-a512-4a6d-ac83-46779c378d42,hyderabad,700
260289,1970-01-01 00:00:00+00:00,54350588,browser_and_web_based_support,easy,520.0,c_fe0eb66a-f7de-4db0-b421-f98b6a1e387c,hyderabad,650
260290,1970-01-01 00:00:00+00:00,78885138,software_installation_and_configuration,easy,600.0,c_4e7ca62b-18d8-45fa-918c-b3da7fa5436b,hyderabad,750
260291,1970-01-01 00:00:00+00:00,48763576,teams_problems,hard,798.0,c_fc9f040e-99f1-4785-8fbd-e261632d5ae2,hyderabad,665


In [19]:
# Calculate metrics for previous and new reports
previous_metrics = calculate_metrics(previous_reports_df)
new_metrics = calculate_metrics(new_reports_df)

In [20]:
previous_reports_df = previous_reports_df.merge(load_and_flatten_data('extracted/previous_calls', 'call_id'), on='call_id', how='left')
new_reports_df = new_reports_df.merge(load_and_flatten_data('Week2\calls', 'call_id'), on='call_id', how='left')


In [21]:
previous_reports_df

Unnamed: 0,call_id,worker_id,call_time,likely_to_recommend,professional_score,call_profit,date,tlf_number,technical_problem,difficulty,commission,location
0,c_a317abc5-a7e1-40c3-b486-c4eefdf8be05,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,57.057332,1.0,,695,2024-10-19 07:28:55.254686,85062036,zoom_problems,medium,695.0,bangalore
1,c_256844cb-b710-4da7-8f95-238a7fdd5261,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,69.294334,1.0,,735,2024-10-25 08:14:21.255716,56920356,operating_system_support,medium,735.0,bangalore
2,c_46bfdc38-5130-4e08-9918-4851d84930ce,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,70.794334,1.0,,735,2024-10-25 08:05:02.256263,71277349,operating_system_support,medium,735.0,bangalore
3,c_497ac5ca-8ff3-4773-968c-2e300486c35d,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,54.379299,1.0,,532,2024-10-25 16:49:01.261099,26307674,teams_problems,easy,532.0,bangalore
4,c_97384374-b420-4259-aaa9-f2e1571928cb,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,77.457332,1.0,,834,2024-10-25 13:12:40.265332,44681209,zoom_problems,hard,834.0,bangalore
...,...,...,...,...,...,...,...,...,...,...,...,...
72511,c_282338ea-b4a3-4d55-801c-1935c08be259,w_ff6501e8-b49a-4512-a9eb-e06565d21355,37.453295,1.0,,705,2024-10-24 18:24:02.185299,68091458,device_and_peripheral_setup,medium,705.0,hyderabad
72512,c_9f076c7e-aa1f-44e2-a904-ec7daaec52c4,w_ff6501e8-b49a-4512-a9eb-e06565d21355,62.093346,1.0,,780,2024-10-24 03:11:06.185759,46370406,browser_and_web_based_support,hard,780.0,hyderabad
72513,c_bece22d1-0ba0-4208-91f8-366c8267177c,w_ff6501e8-b49a-4512-a9eb-e06565d21355,61.482360,1.0,,864,2024-10-22 12:55:45.188029,38466912,basic_hardware_troubleshooting,hard,864.0,hyderabad
72514,c_ac55b479-d0ad-4c68-a637-87b3c360ceb9,w_ff6501e8-b49a-4512-a9eb-e06565d21355,32.353295,1.0,,564,2024-10-20 07:32:03.190272,91733895,device_and_peripheral_setup,easy,564.0,hyderabad


In [22]:
# Calculate percentage changes
comparison = {
    "Metric": ["Average Profit", "Average Call Time", "Average Recommendation"],
    "Previous": [previous_metrics["Average Profit"], previous_metrics["Average Call Time"], previous_metrics["Average Recommendation"]],
    "New": [new_metrics["Average Profit"], new_metrics["Average Call Time"], new_metrics["Average Recommendation"]],
    "Change (%)": [
        (new_metrics["Average Profit"] - previous_metrics["Average Profit"]) / previous_metrics["Average Profit"] * 100 if previous_metrics["Average Profit"] else None,
        (new_metrics["Average Call Time"] - previous_metrics["Average Call Time"]) / previous_metrics["Average Call Time"] * 100 if previous_metrics["Average Call Time"] else None,
        (new_metrics["Average Recommendation"] - previous_metrics["Average Recommendation"]) / previous_metrics["Average Recommendation"] * 100 if previous_metrics["Average Recommendation"] else None
    ]
}

# Create a DataFrame for comparison
comparison_df = pd.DataFrame(comparison)

In [24]:
comparison_df

Unnamed: 0,Metric,Previous,New,Change (%)
0,Average Profit,698.655442,698.894464,0.034212
1,Average Call Time,44.539292,43.545245,-2.231845
2,Average Recommendation,1.546179,1.544043,-0.138112
