In [3]:
import pandas as pd
import json
import glob

In [4]:
# Load workers data and format correctly
workers_df = pd.read_json('extracted/workers.json').transpose().reset_index()
workers_df.columns = ['worker_id', 'name', 'base_salary']



In [5]:
# Load prices.json as a dictionary and convert to DataFrame
with open('prices.json') as f:
    prices_data = json.load(f)
prices_df = pd.DataFrame(list(prices_data.items()), columns=['technical_problem', 'price'])



In [6]:
# Function to load and flatten data from multiple JSON files in a directory
def load_and_flatten_data(directory, key_field):
    records = []
    for file_path in glob.glob(f'{directory}/*.json'):
        with open(file_path) as f:
            data = json.load(f)
        for location, calls in data.items():
            for call_id, call_info in calls.items():
                call_info['call_id'] = call_id
                call_info['location'] = location
                records.append(call_info)
    return pd.DataFrame(records)



In [7]:
# Load feature calls, previous calls, previous reports, and schedules
feature_calls_df = load_and_flatten_data('extracted/feature_calls', 'call_id')
previous_calls_df = load_and_flatten_data('extracted/previous_calls', 'call_id')


In [20]:
feature_calls_df

Unnamed: 0,date,tlf_number,technical_problem,difficulty,commission,call_id,location,price
0,2024-10-23 16:54:39.515508,72345741,teams_problems,medium,665.0,c_5e239e86-20f4-4053-a77b-a01dd3971aad,bangalore,665
1,2024-10-23 12:18:18.515647,53336166,basic_hardware_troubleshooting,easy,576.0,c_1cbe224b-804b-4b30-bf78-198feea748f8,bangalore,720
2,2024-10-20 01:42:41.515693,81959025,email_related_issues,medium,700.0,c_ca2f35cc-47fb-446f-a8ad-3c0360edb98c,bangalore,700
3,2024-10-24 12:14:40.515734,96315388,device_and_peripheral_setup,medium,705.0,c_59b1c1cc-6362-4edd-aed8-720536d733e6,bangalore,705
4,2024-10-24 19:24:07.515775,54054590,internet_problems,medium,680.0,c_69e87302-cad8-42a3-9fb2-e64c0a4b14e4,bangalore,680
...,...,...,...,...,...,...,...,...
257096,2024-10-22 17:11:21.910960,35675011,zoom_problems,hard,834.0,c_2b052e28-f48f-4c85-8b75-f73f27ef75f0,hyderabad,695
257097,2024-10-23 09:14:03.910999,22767682,internet_problems,medium,680.0,c_65dc01b2-5091-40d1-9838-c8a749b3f766,hyderabad,680
257098,2024-10-19 17:31:44.911038,24957531,email_related_issues,medium,700.0,c_827e75d4-1948-433a-ad5d-f2bdf083ec71,hyderabad,700
257099,2024-10-20 17:10:37.911077,00563223,internet_problems,medium,680.0,c_68b59f6b-00e7-4529-a353-d9ded93226ce,hyderabad,680


In [9]:

# Load previous and new reports
previous_reports = []
for file_path in glob.glob('extracted/previous_reports/*.json'):
    previous_reports.extend(pd.read_json(file_path).to_dict(orient='records'))
previous_reports_df = pd.DataFrame(previous_reports)



In [10]:
new_reports = []
for file_path in glob.glob('ikkeheltimal_call_reports_11_20/future_call_reports/*.json'):
    new_reports.extend(pd.read_json(file_path).to_dict(orient='records'))
new_reports_df = pd.DataFrame(new_reports)



In [11]:
# Combine prices with calls based on technical problems
feature_calls_df = feature_calls_df.merge(prices_df, on='technical_problem', how='left')
previous_calls_df = previous_calls_df.merge(prices_df, on='technical_problem', how='left')



In [21]:
previous_reports_df

Unnamed: 0,call_id,worker_id,call_time,likely_to_recommend,professional_score,call_profit
0,c_a317abc5-a7e1-40c3-b486-c4eefdf8be05,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,57.057332,1.0,,695
1,c_256844cb-b710-4da7-8f95-238a7fdd5261,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,69.294334,1.0,,735
2,c_46bfdc38-5130-4e08-9918-4851d84930ce,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,70.794334,1.0,,735
3,c_497ac5ca-8ff3-4773-968c-2e300486c35d,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,54.379299,1.0,,532
4,c_97384374-b420-4259-aaa9-f2e1571928cb,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,77.457332,1.0,,834
...,...,...,...,...,...,...
72511,c_282338ea-b4a3-4d55-801c-1935c08be259,w_ff6501e8-b49a-4512-a9eb-e06565d21355,37.453295,1.0,,705
72512,c_9f076c7e-aa1f-44e2-a904-ec7daaec52c4,w_ff6501e8-b49a-4512-a9eb-e06565d21355,62.093346,1.0,,780
72513,c_bece22d1-0ba0-4208-91f8-366c8267177c,w_ff6501e8-b49a-4512-a9eb-e06565d21355,61.482360,1.0,,864
72514,c_ac55b479-d0ad-4c68-a637-87b3c360ceb9,w_ff6501e8-b49a-4512-a9eb-e06565d21355,32.353295,1.0,,564


In [26]:
# Load previous schedules
schedule_records = []
for file_path in glob.glob('extracted/previous_schedules/*.json'):
    with open(file_path) as f:
        schedules_data = json.load(f)
    for worker_id, calls in schedules_data.items():
        for call_id in calls:
            schedule_records.append({'worker_id': worker_id, 'call_id': call_id})
previous_schedules_df = pd.DataFrame(schedule_records)


In [27]:
previous_schedules_df

Unnamed: 0,worker_id,call_id
0,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,c_a317abc5-a7e1-40c3-b486-c4eefdf8be05
1,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,c_256844cb-b710-4da7-8f95-238a7fdd5261
2,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,c_46bfdc38-5130-4e08-9918-4851d84930ce
3,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,c_497ac5ca-8ff3-4773-968c-2e300486c35d
4,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,c_97384374-b420-4259-aaa9-f2e1571928cb
...,...,...
283110,w_ff6501e8-b49a-4512-a9eb-e06565d21355,c_bf99f2ec-a582-4123-8b11-28d6a1c39551
283111,w_ff6501e8-b49a-4512-a9eb-e06565d21355,c_31d6c622-ad70-4772-908c-99768c628e43
283112,w_ff6501e8-b49a-4512-a9eb-e06565d21355,c_9b26aba2-542f-4ba4-931a-1feb8ffacb8f
283113,w_ff6501e8-b49a-4512-a9eb-e06565d21355,c_541e5944-02c7-4297-a66a-6f5949bd2688


In [38]:
# Load future schedules
feature_schedules = []
for file_path in glob.glob('extracted/feature_schedules/*.json'):
    with open(file_path) as f:
        schedules_data = json.load(f)
    for worker_id, calls in schedules_data.items():
        for call_id in calls:
            feature_schedules.append({'worker_id': worker_id, 'call_id': call_id})
future_schedules_df = pd.DataFrame(feature_schedules)


In [39]:
future_schedules_df

Unnamed: 0,worker_id,call_id
0,w_eb5ca7e7-197b-4128-9cdd-17b8d7d07803,c_718780d2-05b4-45be-9685-cd37583049ab
1,w_ad84fb4e-5229-4c19-91e7-5e5cf8d3f20c,c_cd9c31f6-2c9c-4b41-95c2-0d6cfa5d7c52
2,w_ad84fb4e-5229-4c19-91e7-5e5cf8d3f20c,c_b5cce9b3-82f6-4c90-9d15-a27088eff079
3,w_ad84fb4e-5229-4c19-91e7-5e5cf8d3f20c,c_9bd813d0-005e-412d-b4aa-505d9282d6eb
4,w_ad84fb4e-5229-4c19-91e7-5e5cf8d3f20c,c_98a0c19f-aece-47a5-b8db-81914795e10f
...,...,...
257096,w_f5400776-c6cc-4563-b835-b5ee56242aa1,c_09797d53-5ccc-4054-8d43-5ef5ac3abba5
257097,w_f5400776-c6cc-4563-b835-b5ee56242aa1,c_f746aa7f-47b7-47b0-bf38-46fb1bc53baa
257098,w_f5400776-c6cc-4563-b835-b5ee56242aa1,c_1995c8af-691a-47b1-81d1-f736aa914171
257099,w_f5400776-c6cc-4563-b835-b5ee56242aa1,c_dc884591-ee3c-42d2-a0dc-7dd5ef9b3cf1


In [14]:
# Calculate key performance metrics
def calculate_metrics(report_df):
    return {
        "Average Profit": report_df['call_profit'].mean(),
        "Average Call Time": report_df['call_time'].mean(),
        "Average Recommendation": report_df['likely_to_recommend'].mean(),
        
    }



In [15]:
new_reports_df

Unnamed: 0,call_id,worker_id,call_time,likely_to_recommend,professional_score,call_profit
0,c_670f54f4-9041-4ae2-85b9-05d30a4dbf23,w_eb5ca7e7-197b-4128-9cdd-17b8d7d07803,33.017543,1.0,,780
1,c_a4b4e43f-5939-4992-b576-c844f90a8131,w_eb5ca7e7-197b-4128-9cdd-17b8d7d07803,26.193912,1.0,,900
2,c_e2a563ff-802e-4f56-9d08-47fadc3270b6,w_eb5ca7e7-197b-4128-9cdd-17b8d7d07803,26.551302,1.0,,864
3,c_c2f70a56-60d8-45b8-b90c-73a6c24d6cc8,w_eb5ca7e7-197b-4128-9cdd-17b8d7d07803,28.051302,1.0,,864
4,c_17d86ada-c033-460e-9205-47b5c1e1f248,w_eb5ca7e7-197b-4128-9cdd-17b8d7d07803,31.855709,1.0,,846
...,...,...,...,...,...,...
22062,c_8905e65e-ef82-419c-a0c3-96e74589791e,w_dd684140-be47-455f-a00f-0c21fb54752e,33.134185,1.0,,852
22063,c_b80b05f3-cf5b-47f3-a25f-0219e06dea0c,w_dd684140-be47-455f-a00f-0c21fb54752e,44.738654,1.0,,864
22064,c_593c7020-4e2f-4e01-875f-a156134d02db,w_dd684140-be47-455f-a00f-0c21fb54752e,47.352038,1.0,,834
22065,c_2aa0b6d7-9b7c-4d80-bc35-547021944abf,w_dd684140-be47-455f-a00f-0c21fb54752e,46.770240,1.0,,816


In [16]:
feature_calls_df

Unnamed: 0,date,tlf_number,technical_problem,difficulty,commission,call_id,location,price
0,2024-10-23 16:54:39.515508,72345741,teams_problems,medium,665.0,c_5e239e86-20f4-4053-a77b-a01dd3971aad,bangalore,665
1,2024-10-23 12:18:18.515647,53336166,basic_hardware_troubleshooting,easy,576.0,c_1cbe224b-804b-4b30-bf78-198feea748f8,bangalore,720
2,2024-10-20 01:42:41.515693,81959025,email_related_issues,medium,700.0,c_ca2f35cc-47fb-446f-a8ad-3c0360edb98c,bangalore,700
3,2024-10-24 12:14:40.515734,96315388,device_and_peripheral_setup,medium,705.0,c_59b1c1cc-6362-4edd-aed8-720536d733e6,bangalore,705
4,2024-10-24 19:24:07.515775,54054590,internet_problems,medium,680.0,c_69e87302-cad8-42a3-9fb2-e64c0a4b14e4,bangalore,680
...,...,...,...,...,...,...,...,...
257096,2024-10-22 17:11:21.910960,35675011,zoom_problems,hard,834.0,c_2b052e28-f48f-4c85-8b75-f73f27ef75f0,hyderabad,695
257097,2024-10-23 09:14:03.910999,22767682,internet_problems,medium,680.0,c_65dc01b2-5091-40d1-9838-c8a749b3f766,hyderabad,680
257098,2024-10-19 17:31:44.911038,24957531,email_related_issues,medium,700.0,c_827e75d4-1948-433a-ad5d-f2bdf083ec71,hyderabad,700
257099,2024-10-20 17:10:37.911077,00563223,internet_problems,medium,680.0,c_68b59f6b-00e7-4529-a353-d9ded93226ce,hyderabad,680


In [17]:
# Calculate metrics for previous and new reports
previous_metrics = calculate_metrics(previous_reports_df)
new_metrics = calculate_metrics(new_reports_df)



In [18]:
previous_reports_df = previous_reports_df.merge(load_and_flatten_data('extracted/previous_calls'), on='call_id', how='left')
new_reports_df = new_reports_df.merge(load_and_flatten_data('extracted/feature_calls'), on='call_id', how='left')


TypeError: load_and_flatten_data() missing 1 required positional argument: 'key_field'

In [None]:
# Calculate percentage changes
comparison = {
    "Metric": ["Average Profit", "Average Call Time", "Average Recommendation"],
    "Previous": [previous_metrics["Average Profit"], previous_metrics["Average Call Time"], previous_metrics["Average Recommendation"]],
    "New": [new_metrics["Average Profit"], new_metrics["Average Call Time"], new_metrics["Average Recommendation"]],
    "Change (%)": [
        (new_metrics["Average Profit"] - previous_metrics["Average Profit"]) / previous_metrics["Average Profit"] * 100 if previous_metrics["Average Profit"] else None,
        (new_metrics["Average Call Time"] - previous_metrics["Average Call Time"]) / previous_metrics["Average Call Time"] * 100 if previous_metrics["Average Call Time"] else None,
        (new_metrics["Average Recommendation"] - previous_metrics["Average Recommendation"]) / previous_metrics["Average Recommendation"] * 100 if previous_metrics["Average Recommendation"] else None
    ]
}

# Create a DataFrame for comparison
comparison_df = pd.DataFrame(comparison)

In [None]:
comparison_df

Unnamed: 0,Metric,Previous,New,Change (%)
0,Average Profit,698.655442,814.616305,16.597718
1,Average Call Time,44.539292,51.851774,16.418046
2,Average Recommendation,1.546179,1.459705,-5.592771
