In [None]:
%pip install -r requirements.txt

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
from scipy.stats import pearsonr
from sklearn.linear_model import LinearRegression





In [None]:
cr = pd.read_excel("customer_rating.xlsx")
print(cr.head())

# clean code and prepare 
# convert to date time format
cr["TIMESTAMP"] = pd.to_datetime(cr["TIMESTAMP"], unit="s")
print(cr[["TIMESTAMP"]].head()) #check
print(cr.isnull().sum())

cr = cr.dropna() #to double check
print(cr.isnull().sum())

In [None]:

cr['ISSUE_RESOLVED_ENUM'] = cr['ISSUE_RESOLVED_ENUM'].replace({ 'Yes - after single contact': 1,
    'Yes - after multiple contacts': 1,
    'Not resolved': 0})

reg_res_rate = cr.groupby('REGION')['ISSUE_RESOLVED_ENUM'].mean().reset_index()
top_regions = reg_res_rate.sort_values(by='ISSUE_RESOLVED_ENUM', ascending=False).head(10)
regions_sc1 = reg_res_rate[reg_res_rate['ISSUE_RESOLVED_ENUM'] == 1]
count_regions_sc1 = len(regions_sc1)
print(f"Number of regions with a resolution success rate of 100%: {count_regions_sc1}")

top_regions = reg_res_rate.sort_values(by='ISSUE_RESOLVED_ENUM', ascending=False).head(1200)
cases_regions_sc1 = cr[cr['REGION'].isin(regions_sc1['REGION'])]
n_cases_1 = len(cases_regions_sc1)
print(f"N cases in regions with a 100% resolution success rate: {n_cases_1}")

frac_tot_1 = n_cases_1 / len(cr)
print(f"Fraction of cases from regions with a 100% resolution success rate: {frac_tot_1:.2%}")

regions_sc0 = reg_res_rate[reg_res_rate['ISSUE_RESOLVED_ENUM'] == 0]
casesregions_sc0 = cr[cr['REGION'].isin(regions_sc0['REGION'])]
n_cases_0 = len(casesregions_sc0)
print(f"N cases in regions with a 0% resolution success rate: {n_cases_0}")

frac_total_0 = n_cases_0 / len(cr)
print(f"Fraction of cases from regions with a 0% resolution success rate: {frac_total_0:.2%}")

resolved_cases = cr[cr["ISSUE_RESOLVED_ENUM"] == 1].shape[0]
frac_resolved = resolved_cases / len(cr)
print(f"Total fraction of resolved cases: {frac_resolved:.2%}")

avg_res_rate = cr['ISSUE_RESOLVED_ENUM'].mean()
print(f"Average resolution rate: {avg_res_rate:.2%}")

In [None]:

anz_regions = cr['REGION'].value_counts()
u_regions = cr['REGION'].unique()
n_regions = len(u_regions)
print(f"Total number of unique regions: {n_regions}")
print(f"Top 10 regions by count:\n{anz_regions.head(10)}")

top_regions = anz_regions.head(10).index
cr_tregions = cr[cr['REGION'].isin(top_regions)]

cr_tregions.loc[:, 'ISSUE_RESOLVED_ENUM'] = cr_tregions['ISSUE_RESOLVED_ENUM'].replace({
    'Yes - after single contact': 1,
    'Yes - after multiple contacts': 1,
    'Not resolved': 0
})


submission_counts = cr_tregions['REGION'].value_counts().reset_index()
submission_counts.columns = ['REGION', 'Submission_Count']

region_resolution_rate = cr_tregions.groupby('REGION')['ISSUE_RESOLVED_ENUM'].mean().reset_index()

# Step 7: Merge submission counts and resolution rates
region_summary = pd.merge(region_resolution_rate, submission_counts, on='REGION')

# Step 8: Sort by number of submissions first, then by resolution rate
region_summary_sorted = region_summary.sort_values(by=['Submission_Count', 'ISSUE_RESOLVED_ENUM'], ascending=[False, False])
# Step 10: Add a percentage resolution rate column
region_summary_sorted['Resolution_Rate_%'] = (region_summary_sorted['ISSUE_RESOLVED_ENUM'] * 100).round(2)

plt.figure(figsize=(8, 8))
bars = plt.barh(
    region_summary_sorted['REGION'],
    region_summary_sorted['ISSUE_RESOLVED_ENUM'],
    color='xkcd:magenta'
)
plt.xlabel('Resolution Rate')
plt.ylabel('Region')
plt.title('Resolution Rate in Top 10 Regions by Submission Count')
plt.gca().invert_yaxis()  # To display the highest rate at the top
# Add percentage and submission count labels on the bars
for bar, count in zip(
        bars,
        region_summary_sorted['Submission_Count']):
    plt.text(
        bar.get_width() + 0.01,  # Adjust the position slightly to the right of the bar
        bar.get_y() + bar.get_height() / 2,  # Center the label vertically
        f'{count}',  # Label format: "(count)"
        va='center',
        fontsize=10,
        color='black'
    )

# Show the plot
plt.show()

plt.show()


print(region_summary_sorted)
