In [3]:
#Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path 

#Importing libraries to connect to the database
import auth
import getpass

user  = getpass.getuser()
token = auth.get_valid_token()

#Connecting to Datalake
import trino
import os
connection = trino.dbapi.connect(
    host=os.environ['PRESTO_HOST'],
    port=443,
    user=user,
    catalog="hive",
    schema="dwh_insider",
    http_scheme='https',
    auth=trino.auth.BasicAuthentication(user, token))

In [4]:
# Taking the claim information from the datalake
query = """
SELECT
       carrier_tracking_number
      , fccs.sk_shipment_id 
      , claim_shipment_number
      , sk_claim_report_date
      , aviailable_claim_amount_euro
      , carrier_name
      , carrier_name_short
      , carrier_country_code
      , claimtype
      , claimsubtype
      , claim_category
      , dd.ZIP
      , dd.city
      , TS_LAST_COT_EVENT
      , TS_LAST_CRT_EVENT
      , value_segment 
FROM
    dwh_confidential.f_clm_claim_shipments as fccs
LEFT JOIN
          dwh_insider.d_clm_carrier_liability as dccl
          ON dccl.sk_clm_carrier_liability = fccs.sk_clm_carrier_liability
LEFT JOIN
          dwh_insider.d_network_carriers as dnc
          ON dnc.sk_network_carrier = dccl.sk_network_carrier
LEFT JOIN
          dwh_insider.d_sf_carrier_claim_types as dscct
          ON dscct.sk_claim_type = fccs.sk_claim_type
LEFT JOIN
          dwh_insider.d_clm_claim_categories as dccc
          ON dccc.sk_claim_category = fccs.sk_claim_category
LEFT JOIN
          dwh_insider.d_manual_refund_reason as dmrr
          ON dmrr.sk_manual_refund_reason = fccs.sk_manual_refund_reason
LEFT JOIN
          dwh_confidential.D_DESTINATIONS dd
          ON fccs.SK_DESTINATION = dd.SK_DESTINATION
LEFT JOIN 
          dwh_confidential.f_fos_shipment_order_items AS ffsoi
          ON ffsoi.sk_shipment_id =fccs.sk_shipment_id 
          AND ffsoi.sk_shipment_date =fccs.sk_shipment_date 
LEFT JOIN 
          dwh_confidential.f_customer_value_segments AS fcvs 
          ON fcvs.sk_customer = ffsoi.sk_customer 
          AND ffsoi.sk_shipment_date  BETWEEN fcvs.SK_VALID_FROM 
          AND fcvs.SK_VALID_UNTIL
Where
      carrier_country_code in ('AT','CH') 
GROUP BY 
      1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
"""
df = pd.read_sql_query(query, connection)
df.head(10)

  df = pd.read_sql_query(query, connection)


Unnamed: 0,carrier_tracking_number,sk_shipment_id,claim_shipment_number,sk_claim_report_date,aviailable_claim_amount_euro,carrier_name,carrier_name_short,carrier_country_code,claimtype,claimsubtype,claim_category,ZIP,city,TS_LAST_COT_EVENT,TS_LAST_CRT_EVENT,value_segment
0,996008376522612525,11786361605,1041020237434763,20220610,60.74,SWISS POST,PCH,CH,Delivery,Receipt denied,Delivered - Original Customer,8143,Stallikon,2022-06-13 12:01:29.000,,C
1,1034989004523490211506,12559412202,1041010251264344,20220824,70.79,ÖSTERREICHISCHE POST,PAT,AT,Delivery,Lost in transit,Lost in transit,1150,Wien,2022-08-26 05:55:36.000,,B
2,996013622502148937,13652998222,1041040268913487,20221130,174.28,SWISS POST,PCH,CH,Delivery,Receipt denied,Delivered - Original Customer,4600,Olten,2022-12-05 10:36:37.000,,C
3,996011507320760270,10997480720,1041040223604446,20220328,340.02,SWISS POST,PCH,CH,Delivery,Receipt denied,Delivered - Original Customer,5417,Untersiggenthal,2022-03-30 13:59:33.000,,N
4,1033853008635550740053,12677367760,1041010253118685,20220929,21.66,ÖSTERREICHISCHE POST,PAT,AT,Return,PAT,Missing Return,8713,St Stefan ob Leoben,,2022-10-01 04:42:46.000,C
5,996013537308530605,10258464207,1041040211699895,20220112,277.25,SWISS POST,PCH,CH,Delivery,Receipt denied,Delivered - Original Customer,8712,Stäfa,2022-01-13 14:53:50.000,,D
6,996013537313340094,13254241258,1041030261986006,20221029,422.69,SWISS POST,PCH,CH,Delivery,Receipt denied,Delivered - Original Customer,1213,Petit-Lancy,2022-11-01 10:41:19.000,,B
7,1034986055058130740052,11436218165,1040970195870517,20220601,73.55,ÖSTERREICHISCHE POST,PAT,AT,Return,PAT,Missing Return,3443,Sieghartskirchen,,2022-05-16 08:22:51.000,VIP
8,996011507322463481,12661843095,1040970213589436,20220905,95.98,SWISS POST,PCH,CH,Delivery,Damage delivery,Damage Delivery,8634,Hombrechtikon,2022-09-05 16:45:03.000,,VIP
9,996013537313660799,13496341493,1041020265803270,20221121,17.0,SWISS POST,PCH,CH,Delivery,Receipt denied,Delivered - Mailbox,8048,Zürich,2022-11-22 07:31:35.000,,A


In [5]:
##Loading the data from the Feige team 
feige_data = pd.read_excel('/home/vralaptisrin/nfs/notebooks/Varshitha_notebooks/Feige_project/Fiege_report_nov.xlsx')

In [6]:
#Loading the data and transforming the information for the declined claims for AT and CH markets
declined_claims= feige_data[["Tracking ID","Carrier","Last activity","Country","Rejection","Category","Status","Date received"]]
declined_claims= declined_claims[declined_claims["Last activity"]=='Declined Claim']
declined_claims= declined_claims[declined_claims["Country"].isin(['AT','CH'])]
declined_claims= declined_claims[declined_claims["Status"]=='closed']
declined_claims['Tracking ID']=declined_claims['Tracking ID'].str.strip()
declined_claims=declined_claims[(declined_claims['Tracking ID']!='keine Sendungsnummer vorhanden')&(declined_claims['Tracking ID']!='keine Sendungsnummer')&(declined_claims['Tracking ID']!='1.04102E+15')]
declined_claims['Tracking ID']=declined_claims['Tracking ID'].str.upper()
declined_claims=declined_claims.drop_duplicates()
declined_claims=declined_claims.dropna(subset=['Rejection'])
declined_claims.head(2)

Unnamed: 0,Tracking ID,Carrier,Last activity,Country,Rejection,Category,Status,Date received
6,1033856021958050740050,Post AT,Declined Claim,AT,Other,Return clarification,closed,2022-08-22 07:02:18
9,1034670500992110243207,Post AT,Declined Claim,AT,Zalando Mistake,Return clarification,closed,2022-08-22 10:49:18


In [7]:
## merging of the data
df['city'] =df['city'].str.upper()
df2=pd.merge( declined_claims,df,left_on="Tracking ID", right_on="carrier_tracking_number", how='left' )
df2=df2[["carrier_tracking_number","Tracking ID","sk_claim_report_date","Date received","Country","Carrier","aviailable_claim_amount_euro","claimtype","claim_category","Category","Rejection","carrier_name","ZIP","city","TS_LAST_COT_EVENT","value_segment"]]
df2['aviailable_claim_amount_euro'] = df2['aviailable_claim_amount_euro'].astype(float)
df2.head(2)

Unnamed: 0,carrier_tracking_number,Tracking ID,sk_claim_report_date,Date received,Country,Carrier,aviailable_claim_amount_euro,claimtype,claim_category,Category,Rejection,carrier_name,ZIP,city,TS_LAST_COT_EVENT,value_segment
0,1033856021958050740050,1033856021958050740050,20220819.0,2022-08-22 07:02:18,AT,Post AT,55.08,Return,Missing Return,Return clarification,Other,ÖSTERREICHISCHE POST,1070.0,WIEN,,B
1,,1034670500992110243207,,2022-08-22 10:49:18,AT,Post AT,,,,Return clarification,Zalando Mistake,,,,,


In [15]:
#For creating a graph
CH_market_analysis=df2[df2["Country"]=="CH"]
CH_market_analysis=CH_market_analysis[CH_market_analysis["Rejection"]=='Safe place delivery']
CH_market_analysis=CH_market_analysis[CH_market_analysis["ZIP"]=='4057']
CH_market_analysis

Unnamed: 0,carrier_tracking_number,Tracking ID,sk_claim_report_date,Date received,Country,Carrier,aviailable_claim_amount_euro,claimtype,claim_category,Category,Rejection,carrier_name,ZIP,city,TS_LAST_COT_EVENT,value_segment
1077,996013537311926981,996013537311926981,20220716,2022-09-15 15:43:14,CH,Post CH,115.92,Delivery,Delivered - Mailbox,Shipment loss,Safe place delivery,SWISS POST,4057,BASEL,2022-07-19 12:31:51.000,C
1126,996009745959477901,996009745959477901,20220906,2022-09-16 12:49:04,CH,Post CH,63.05,Delivery,Delivered - Mailbox,Shipment loss,Safe place delivery,SWISS POST,4057,BASEL,2022-09-07 18:50:25.000,C
1133,996013537312235737,996013537312235737,20220817,2022-09-16 14:08:37,CH,Post CH,275.49,Delivery,Delivered - Mailbox,Shipment loss,Safe place delivery,SWISS POST,4057,BASEL,2022-08-18 12:04:19.000,A
1205,996007791545926897,996007791545926897,20220908,2022-09-19 09:01:04,CH,Post CH,29.08,Delivery,Delivered - Mailbox,Shipment loss,Safe place delivery,SWISS POST,4057,BASEL,2022-09-09 11:55:10.000,B
1224,996009745957415002,996009745957415002,20220817,2022-09-19 14:29:33,CH,Post CH,67.85,Delivery,Delivered - Mailbox,Shipment loss,Safe place delivery,SWISS POST,4057,BASEL,2022-08-18 18:18:51.000,VIP
1351,996013537312650748,996013537312650748,20220919,2022-09-21 13:11:45,CH,Post CH,163.67,Delivery,Delivered - Mailbox,Shipment loss,Safe place delivery,SWISS POST,4057,BASEL,2022-09-20 08:05:15.000,A
1526,996011507322527457,996011507322527457,20220913,2022-09-26 08:56:34,CH,Post CH,274.52,Delivery,Delivered - Mailbox,Shipment loss,Safe place delivery,SWISS POST,4057,BASEL,2022-09-15 08:59:28.000,A
1583,996013537312674581,996013537312674581,20220920,2022-09-26 14:39:56,CH,Post CH,341.7,Delivery,Delivered - Mailbox,Shipment loss,Safe place delivery,SWISS POST,4057,BASEL,2022-09-21 09:31:39.000,B
1697,996011507322653886,996011507322653886,20220926,2022-09-28 08:39:04,CH,Post CH,172.97,Delivery,Delivered - Mailbox,Shipment loss,Safe place delivery,SWISS POST,4057,BASEL,2022-09-27 10:12:27.000,B
1755,996013537312729840,996013537312729840,20220923,2022-09-28 14:43:53,CH,Post CH,209.09,Delivery,Lost in transit,Shipment loss,Safe place delivery,SWISS POST,4057,BASEL,2022-09-27 13:20:52.000,D


In [17]:
m=CH_market_analysis.groupby(['value_segment'], as_index=True).agg({'aviailable_claim_amount_euro':'sum','Tracking ID':'nunique'})
m

Unnamed: 0_level_0,aviailable_claim_amount_euro,Tracking ID
value_segment,Unnamed: 1_level_1,Unnamed: 2_level_1
A,936.22,5
B,1222.79,9
C,1145.56,12
D,706.88,5
N,457.73,4
Passive C,112.67,1
VIP,571.11,5


In [None]:
fig, ax = plt.subplots()
x=CH_market_analysis["value_segment"]
y=CH_market_analysis["number_of_claims"]
ax.bar(x,y)
# Label with label_type 'center' instead of the default 'edge'
ax.bar_label(ax.containers[0], label_type='edge')
ax.set_xlabel("Value Segment")

ax.set_ylabel("# of claims")
plt.show()