In [1]:
import pandas as pd
import json
import glob
import logging
import time

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

start_time = time.time()
logging.info("Loading workers data and calculating recommendation scores...")


2024-10-30 19:04:10,373 - INFO - Loading workers data and calculating recommendation scores...


In [17]:

# Load workers data and format correctly
workers_df = pd.read_json('extracted/workers.json').transpose().reset_index()
workers_df.columns = ['worker_id', 'name', 'base_salary']



In [18]:
workers_df

Unnamed: 0,worker_id,name,base_salary
0,w_eb5ca7e7-197b-4128-9cdd-17b8d7d07803,Efren Selva,10119
1,w_ad84fb4e-5229-4c19-91e7-5e5cf8d3f20c,Alan Brown,10715
2,w_653b3a89-c5fa-466b-a477-f0c09a724cdd,Douglas Case,8259
3,w_7fbf0deb-0c65-449a-91ee-0e9e8cff4d0c,Christa Scott,11672
4,w_cf0ff121-da11-4b5c-b191-4d895ce97512,Christopher Greer,9408
...,...,...,...
750,w_972693d7-5edb-4499-abcb-98861d5a72e8,Anthony Nagel,10873
751,w_4d6a56fd-1d07-477f-b86a-2ced18e96076,Roger Doubet,9881
752,w_62aff065-2e41-4d62-a64e-a6c5783ef77a,Randall Clark,9610
753,w_46c17e47-9b03-482a-9d91-eeabb5eb1b84,Eleanor Williamson,10455


In [19]:
# Load and flatten reports data for worker performance
reports_records = []
for file_path in glob.glob('extracted/previous_reports/*.json'):
    reports_records.extend(pd.read_json(file_path).to_dict(orient='records'))
reports_df = pd.DataFrame(reports_records)



In [20]:
reports_df

Unnamed: 0,call_id,worker_id,call_time,likely_to_recommend,professional_score,call_profit
0,c_a317abc5-a7e1-40c3-b486-c4eefdf8be05,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,57.057332,1.0,,695
1,c_256844cb-b710-4da7-8f95-238a7fdd5261,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,69.294334,1.0,,735
2,c_46bfdc38-5130-4e08-9918-4851d84930ce,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,70.794334,1.0,,735
3,c_497ac5ca-8ff3-4773-968c-2e300486c35d,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,54.379299,1.0,,532
4,c_97384374-b420-4259-aaa9-f2e1571928cb,w_3cbcf16f-2173-4281-bad0-b5579ba0f123,77.457332,1.0,,834
...,...,...,...,...,...,...
72511,c_282338ea-b4a3-4d55-801c-1935c08be259,w_ff6501e8-b49a-4512-a9eb-e06565d21355,37.453295,1.0,,705
72512,c_9f076c7e-aa1f-44e2-a904-ec7daaec52c4,w_ff6501e8-b49a-4512-a9eb-e06565d21355,62.093346,1.0,,780
72513,c_bece22d1-0ba0-4208-91f8-366c8267177c,w_ff6501e8-b49a-4512-a9eb-e06565d21355,61.482360,1.0,,864
72514,c_ac55b479-d0ad-4c68-a637-87b3c360ceb9,w_ff6501e8-b49a-4512-a9eb-e06565d21355,32.353295,1.0,,564


In [21]:
# Calculate average recommendation score for each worker
worker_performance = reports_df.groupby('worker_id')['likely_to_recommend'].mean().reset_index()
worker_performance.columns = ['worker_id', 'avg_recommendation_score']

# Merge performance data with workers
workers_df = workers_df.merge(worker_performance, on='worker_id', how='left')


In [22]:
workers_df

Unnamed: 0,worker_id,name,base_salary,avg_recommendation_score
0,w_eb5ca7e7-197b-4128-9cdd-17b8d7d07803,Efren Selva,10119,1.816522
1,w_ad84fb4e-5229-4c19-91e7-5e5cf8d3f20c,Alan Brown,10715,1.512500
2,w_653b3a89-c5fa-466b-a477-f0c09a724cdd,Douglas Case,8259,1.486364
3,w_7fbf0deb-0c65-449a-91ee-0e9e8cff4d0c,Christa Scott,11672,1.641053
4,w_cf0ff121-da11-4b5c-b191-4d895ce97512,Christopher Greer,9408,1.622093
...,...,...,...,...
750,w_972693d7-5edb-4499-abcb-98861d5a72e8,Anthony Nagel,10873,1.524211
751,w_4d6a56fd-1d07-477f-b86a-2ced18e96076,Roger Doubet,9881,1.368132
752,w_62aff065-2e41-4d62-a64e-a6c5783ef77a,Randall Clark,9610,1.747778
753,w_46c17e47-9b03-482a-9d91-eeabb5eb1b84,Eleanor Williamson,10455,1.510465


In [23]:

# Define thresholds based on recommendation score
def assign_difficulty_threshold(score):
    if score > 2.2:
        return 'hard'
    elif 1.8 <= score <= 2.2:
        return 'medium'
    else:
        return 'easy'


In [24]:

# Assign difficulty preference to each worker
workers_df['assigned_difficulty'] = workers_df['avg_recommendation_score'].apply(assign_difficulty_threshold)

logging.info("Loading and flattening feature calls data...")


2024-10-30 19:09:28,361 - INFO - Loading and flattening feature calls data...


In [25]:
workers_df

Unnamed: 0,worker_id,name,base_salary,avg_recommendation_score,assigned_difficulty
0,w_eb5ca7e7-197b-4128-9cdd-17b8d7d07803,Efren Selva,10119,1.816522,medium
1,w_ad84fb4e-5229-4c19-91e7-5e5cf8d3f20c,Alan Brown,10715,1.512500,easy
2,w_653b3a89-c5fa-466b-a477-f0c09a724cdd,Douglas Case,8259,1.486364,easy
3,w_7fbf0deb-0c65-449a-91ee-0e9e8cff4d0c,Christa Scott,11672,1.641053,easy
4,w_cf0ff121-da11-4b5c-b191-4d895ce97512,Christopher Greer,9408,1.622093,easy
...,...,...,...,...,...
750,w_972693d7-5edb-4499-abcb-98861d5a72e8,Anthony Nagel,10873,1.524211,easy
751,w_4d6a56fd-1d07-477f-b86a-2ced18e96076,Roger Doubet,9881,1.368132,easy
752,w_62aff065-2e41-4d62-a64e-a6c5783ef77a,Randall Clark,9610,1.747778,easy
753,w_46c17e47-9b03-482a-9d91-eeabb5eb1b84,Eleanor Williamson,10455,1.510465,easy


In [27]:

# Load feature calls data
feature_records = []
for file_path in glob.glob('extracted/feature_calls/*.json'):
    with open(file_path) as f:
        data = json.load(f)
    for location, calls in data.items():
        for call_id, call_info in calls.items():
            call_info['call_id'] = call_id
            call_info['location'] = location
            feature_records.append(call_info)
feature_calls_df = pd.DataFrame(feature_records)

logging.info("Starting call assignments to workers based on difficulty and performance...")




2024-10-30 19:10:15,453 - INFO - Starting call assignments to workers based on difficulty and performance...


In [31]:
feature_calls_df


Unnamed: 0,date,tlf_number,technical_problem,difficulty,commission,call_id,location
0,2024-10-23 16:54:39.515508,72345741,teams_problems,medium,665.0,c_5e239e86-20f4-4053-a77b-a01dd3971aad,bangalore
1,2024-10-23 12:18:18.515647,53336166,basic_hardware_troubleshooting,easy,576.0,c_1cbe224b-804b-4b30-bf78-198feea748f8,bangalore
2,2024-10-20 01:42:41.515693,81959025,email_related_issues,medium,700.0,c_ca2f35cc-47fb-446f-a8ad-3c0360edb98c,bangalore
3,2024-10-24 12:14:40.515734,96315388,device_and_peripheral_setup,medium,705.0,c_59b1c1cc-6362-4edd-aed8-720536d733e6,bangalore
4,2024-10-24 19:24:07.515775,54054590,internet_problems,medium,680.0,c_69e87302-cad8-42a3-9fb2-e64c0a4b14e4,bangalore
...,...,...,...,...,...,...,...
257096,2024-10-22 17:11:21.910960,35675011,zoom_problems,hard,834.0,c_2b052e28-f48f-4c85-8b75-f73f27ef75f0,hyderabad
257097,2024-10-23 09:14:03.910999,22767682,internet_problems,medium,680.0,c_65dc01b2-5091-40d1-9838-c8a749b3f766,hyderabad
257098,2024-10-19 17:31:44.911038,24957531,email_related_issues,medium,700.0,c_827e75d4-1948-433a-ad5d-f2bdf083ec71,hyderabad
257099,2024-10-20 17:10:37.911077,00563223,internet_problems,medium,680.0,c_68b59f6b-00e7-4529-a353-d9ded93226ce,hyderabad


In [8]:
# Assign calls to workers based on difficulty and performance
def assign_calls_to_workers(calls_df, workers_df):
    schedule = {worker_id: [] for worker_id in workers_df['worker_id']}
    total_calls = len(calls_df)
    
    for i, (_, call) in enumerate(calls_df.iterrows(), 1):
        eligible_workers = workers_df[workers_df['assigned_difficulty'] == call['difficulty']]
        eligible_workers = eligible_workers.sort_values(by='avg_recommendation_score', ascending=False)
        top_worker = eligible_workers.iloc[0]['worker_id']
        schedule[top_worker].append(call['call_id'])
        
        # Calculate and log progress
        if i % 100 == 0 or i == total_calls:
            elapsed = time.time() - start_time
            remaining = (elapsed / i) * (total_calls - i)
            logging.info(f"Assigned {i}/{total_calls} calls. Estimated time remaining: {remaining:.2f} seconds.")
    
    return schedule

call_schedule = assign_calls_to_workers(feature_calls_df, workers_df)

logging.info("Saving generated schedule to 'generated_schedule.json'...")


2024-10-30 19:05:51,106 - INFO - Assigned 100/257101 calls. Estimated time remaining: 258883.16 seconds.
2024-10-30 19:05:51,156 - INFO - Assigned 200/257101 calls. Estimated time remaining: 129455.82 seconds.
2024-10-30 19:05:51,277 - INFO - Assigned 300/257101 calls. Estimated time remaining: 86373.99 seconds.
2024-10-30 19:05:51,317 - INFO - Assigned 400/257101 calls. Estimated time remaining: 64781.15 seconds.
2024-10-30 19:05:51,367 - INFO - Assigned 500/257101 calls. Estimated time remaining: 51830.13 seconds.
2024-10-30 19:05:51,426 - INFO - Assigned 600/257101 calls. Estimated time remaining: 43200.16 seconds.
2024-10-30 19:05:51,480 - INFO - Assigned 700/257101 calls. Estimated time remaining: 37034.20 seconds.
2024-10-30 19:05:51,528 - INFO - Assigned 800/257101 calls. Estimated time remaining: 32407.62 seconds.
2024-10-30 19:05:51,586 - INFO - Assigned 900/257101 calls. Estimated time remaining: 28812.14 seconds.
2024-10-30 19:05:51,641 - INFO - Assigned 1000/257101 calls. E

KeyboardInterrupt: 

In [9]:

# Save the schedule in the required format
output_schedule = {worker_id: calls for worker_id, calls in call_schedule.items()}

with open('generated_schedule.json', 'w') as outfile:
    json.dump(output_schedule, outfile, indent=4)

total_time = time.time() - start_time
logging.info(f"Call schedule generated and saved to 'generated_schedule.json' in {total_time:.2f} seconds.")

NameError: name 'call_schedule' is not defined