In [3]:
# import python libraries
import pandas as pd
import numpy as np

import seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline

In [4]:
# load data 
df = pd.read_csv('~/Downloads/WA_Fn-UseC_-Telco-Customer-Churn.csv')

###### Data Preparation

In [5]:
# check lenght of dataset
len(df)

7043

In [6]:
# table overview
df.head(3)

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes


In [7]:
# transpose the table to have the column names as index
# to view all the field names
df.head().T

Unnamed: 0,0,1,2,3,4
customerID,7590-VHVEG,5575-GNVDE,3668-QPYBK,7795-CFOCW,9237-HQITU
gender,Female,Male,Male,Male,Female
SeniorCitizen,0,0,0,0,0
Partner,Yes,No,No,No,No
Dependents,No,No,No,No,No
tenure,1,34,2,45,2
PhoneService,No,Yes,Yes,No,Yes
MultipleLines,No phone service,No,No,No phone service,No
InternetService,DSL,DSL,DSL,DSL,Fiber optic
OnlineSecurity,No,Yes,Yes,Yes,No


In [8]:
# confirming the datatypes for each fileds
df.dtypes

customerID           object
gender               object
SeniorCitizen         int64
Partner              object
Dependents           object
tenure                int64
PhoneService         object
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
StreamingTV          object
StreamingMovies      object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges         object
Churn                object
dtype: object

In [9]:
# change Total Charges datatype to numeric
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors = 'coerce')
df['TotalCharges'] = df['TotalCharges'].fillna(0)

In [10]:
# change fields name to small letters and replace white spaces with underscore
df.columns = df.columns.str.lower().str.replace(' ','_')

# select all the categorical variables and make a list
string_columns = list(df.dtypes[df.dtypes == 'object'].index)

# loop through the list change the text to small letters and replace white spaces with underscore
for col in string_columns:
    df[col] = df[col].str.lower().str.replace(' ', '_')

In [11]:
# convert the churn values from YES/NO to 0/1 int type
df.churn = (df.churn == 'yes').astype(int)

In [12]:
#df.churn = df.churn.apply(lambda x: 1 if 'Yes' in x else 0)

In [13]:
df.churn.unique()

array([0, 1])

In [14]:
# confirm change
df.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,7033,7034,7035,7036,7037,7038,7039,7040,7041,7042
customerid,7590-vhveg,5575-gnvde,3668-qpybk,7795-cfocw,9237-hqitu,9305-cdskc,1452-kiovk,6713-okomc,7892-pookp,6388-tabgu,...,9767-fflem,0639-tsiqw,8456-qdavc,7750-eyxwz,2569-wgero,6840-resvb,2234-xaduh,4801-jzazl,8361-ltmkd,3186-ajiek
gender,female,male,male,male,female,female,male,female,female,male,...,male,female,male,female,female,male,female,female,male,male
seniorcitizen,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
partner,yes,no,no,no,no,no,no,no,yes,no,...,no,no,no,no,no,yes,yes,yes,yes,no
dependents,no,no,no,no,no,no,yes,no,no,yes,...,no,no,no,no,no,yes,yes,yes,no,no
tenure,1,34,2,45,2,8,22,10,28,62,...,38,67,19,12,72,24,72,11,4,66
phoneservice,no,yes,yes,no,yes,yes,yes,no,yes,yes,...,yes,yes,yes,no,yes,yes,yes,no,yes,yes
multiplelines,no_phone_service,no,no,no_phone_service,no,yes,yes,no_phone_service,yes,no,...,no,yes,no,no_phone_service,no,yes,yes,no_phone_service,yes,no
internetservice,dsl,dsl,dsl,dsl,fiber_optic,fiber_optic,fiber_optic,dsl,fiber_optic,dsl,...,fiber_optic,fiber_optic,fiber_optic,dsl,no,dsl,fiber_optic,dsl,fiber_optic,fiber_optic
onlinesecurity,no,yes,yes,yes,no,no,no,yes,no,yes,...,no,yes,no,no,no_internet_service,yes,no,yes,no,yes


In [15]:
# import sklearn library to split the dataset into train and test set
from sklearn.model_selection import train_test_split

In [16]:
# split data for train dataset
df_train_full, df_test = train_test_split(df, test_size = .2, random_state = 1)

In [17]:
# split data for train dataset
df_train, df_val = train_test_split(df_train_full, test_size = .33, random_state = 11)

In [18]:
# set the target to variables
y_train = df_train.churn.values
y_val = df_val.churn.values

In [19]:
# delete the churn field from the train and val dataset
del df_train['churn']
del df_val['churn']

###### Exploratory Data Analysis

In [20]:
# checking for miss valuses
df_train_full.isnull().sum()

customerid          0
gender              0
seniorcitizen       0
partner             0
dependents          0
tenure              0
phoneservice        0
multiplelines       0
internetservice     0
onlinesecurity      0
onlinebackup        0
deviceprotection    0
techsupport         0
streamingtv         0
streamingmovies     0
contract            0
paperlessbilling    0
paymentmethod       0
monthlycharges      0
totalcharges        0
churn               0
dtype: int64

In [21]:
# number of churn - 0 for does who didn't churn while 1 for those who did
df_train_full.churn.value_counts()

0    4113
1    1521
Name: churn, dtype: int64

In [22]:
# defining the globe mean for the churn
global_mean = df_train_full.churn.mean()
round(global_mean, 3)

0.27

In [23]:
# grouping by categorical and numerical variables
categorical = ['gender', 'seniorcitizen', 'partner', 'dependents',
               'phoneservice', 'multiplelines', 'internetservice',
               'onlinesecurity', 'onlinebackup', 'deviceprotection',
               'techsupport', 'streamingtv', 'streamingmovies',
               'contract', 'paperlessbilling', 'paymentmethod']
numerical = ['tenure', 'monthlycharges', 'totalcharges']


###### Feature Importance

In [24]:
# churn rate for gender
female_mean = df_train_full[df_train_full.gender == 'female'].churn.mean()
print('gender == female:', round(female_mean, 3))

male_mean = df_train_full[df_train_full.gender == 'male'].churn.mean()
print('gender == male:', round(male_mean, 3))

gender == female: 0.277
gender == male: 0.263


###### Churn Rate
When the difference between the global average and the group average is -ve, it means they are not likely to churn and the difference is +ve, it means they are most likely to churn.

In [25]:
# churn risk rate for gender 
female_risk = (female_mean/global_mean) - 1
print('gender == female:', round(female_risk, 3))

male_risk = (male_mean/global_mean) - 1
print('gender == male:', round(male_risk, 3))

gender == female: 0.025
gender == male: -0.025


###### Risk Rate
When the ratio group average to the global average is -ve, it means they are not likely to churn and the ratio is +ve and greater the global average, it means they are most likely to churn.

In [26]:
# churn rate for with/with no partner
partner_yes = df_train_full[df_train_full.partner == 'yes'].churn.mean()
print('partner == yes:', round(partner_yes, 3))

partner_no = df_train_full[df_train_full.partner == 'no'].churn.mean()
print('partner == no :', round(partner_no, 3))

partner == yes: 0.205
partner == no : 0.33


In [27]:
# churn risk rate for with/with no partner
partner_yes_risk = (partner_yes/global_mean) - 1
print('partner == yes:', round(partner_yes_risk, 3))

partner_no_risk = (partner_no/global_mean) - 1
print('partner == no :', round(partner_no_risk, 3))

partner == yes: -0.241
partner == no : 0.222


In [28]:
df_group = df_train_full.groupby(by='gender').churn.agg(['mean'])
df_group['diff'] = df_group['mean'] - global_mean
df_group['risk'] = (df_group['mean'] / global_mean) - 1
df_group

Unnamed: 0_level_0,mean,diff,risk
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.276824,0.006856,0.025396
male,0.263214,-0.006755,-0.02502


In [29]:
# import IPython display library
from IPython.display import display

In [30]:
# loof through the entire categorical dataset to determine the churn and risk rate
for col in categorical:
    df_group = df_train_full.groupby(by=col).churn.agg(['mean'])
    df_group['diff'] = df_group['mean'] - global_mean
    df_group['risk'] = (df_group['mean'] / global_mean) - 1
    display(df_group)

Unnamed: 0_level_0,mean,diff,risk
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.276824,0.006856,0.025396
male,0.263214,-0.006755,-0.02502


Unnamed: 0_level_0,mean,diff,risk
seniorcitizen,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.24227,-0.027698,-0.102597
1,0.413377,0.143409,0.531208


Unnamed: 0_level_0,mean,diff,risk
partner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no,0.329809,0.059841,0.221659
yes,0.205033,-0.064935,-0.240528


Unnamed: 0_level_0,mean,diff,risk
dependents,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no,0.31376,0.043792,0.162212
yes,0.165666,-0.104302,-0.386349


Unnamed: 0_level_0,mean,diff,risk
phoneservice,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no,0.241316,-0.028652,-0.10613
yes,0.273049,0.003081,0.011412


Unnamed: 0_level_0,mean,diff,risk
multiplelines,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no,0.257407,-0.012561,-0.046526
no_phone_service,0.241316,-0.028652,-0.10613
yes,0.290742,0.020773,0.076948


Unnamed: 0_level_0,mean,diff,risk
internetservice,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
dsl,0.192347,-0.077621,-0.287518
fiber_optic,0.425171,0.155203,0.574895
no,0.077805,-0.192163,-0.711799


Unnamed: 0_level_0,mean,diff,risk
onlinesecurity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no,0.420921,0.150953,0.559152
no_internet_service,0.077805,-0.192163,-0.711799
yes,0.153226,-0.116742,-0.43243


Unnamed: 0_level_0,mean,diff,risk
onlinebackup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no,0.404323,0.134355,0.497672
no_internet_service,0.077805,-0.192163,-0.711799
yes,0.217232,-0.052736,-0.19534


Unnamed: 0_level_0,mean,diff,risk
deviceprotection,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no,0.395875,0.125907,0.466379
no_internet_service,0.077805,-0.192163,-0.711799
yes,0.230412,-0.039556,-0.14652


Unnamed: 0_level_0,mean,diff,risk
techsupport,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no,0.418914,0.148946,0.551717
no_internet_service,0.077805,-0.192163,-0.711799
yes,0.159926,-0.110042,-0.40761


Unnamed: 0_level_0,mean,diff,risk
streamingtv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no,0.342832,0.072864,0.269897
no_internet_service,0.077805,-0.192163,-0.711799
yes,0.302723,0.032755,0.121328


Unnamed: 0_level_0,mean,diff,risk
streamingmovies,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no,0.338906,0.068938,0.255358
no_internet_service,0.077805,-0.192163,-0.711799
yes,0.307273,0.037305,0.138182


Unnamed: 0_level_0,mean,diff,risk
contract,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
month-to-month,0.431701,0.161733,0.599082
one_year,0.120573,-0.149395,-0.553379
two_year,0.028274,-0.241694,-0.89527


Unnamed: 0_level_0,mean,diff,risk
paperlessbilling,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
no,0.172071,-0.097897,-0.362625
yes,0.338151,0.068183,0.25256


Unnamed: 0_level_0,mean,diff,risk
paymentmethod,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bank_transfer_(automatic),0.168171,-0.101797,-0.377072
credit_card_(automatic),0.164339,-0.10563,-0.391267
electronic_check,0.45589,0.185922,0.688682
mailed_check,0.19387,-0.076098,-0.281879


###### Feature Importance: Mutual Information

In [31]:
# import mutual info score library
from sklearn.metrics import mutual_info_score

In [32]:
# define function for categorical variables to determine feature importance
def calculate_mi(series):
    return mutual_info_score(series, df_train_full.churn)

df_mi = df_train_full[categorical].apply(calculate_mi)
df_mi = df_mi.sort_values(ascending = False).to_frame(name = 'MI')

display(df_mi.head())
display(df_mi.head())

Unnamed: 0,MI
contract,0.09832
onlinesecurity,0.063085
techsupport,0.061032
internetservice,0.055868
onlinebackup,0.046923


Unnamed: 0,MI
contract,0.09832
onlinesecurity,0.063085
techsupport,0.061032
internetservice,0.055868
onlinebackup,0.046923


In [33]:
# df_train_full[categorical].apply(lambda x: mutual_info_score(x, df_train_full.churn))

###### Feature Importance: Correlation 

In [34]:
# determine the correlation between numerical values and churn
df_train_full[numerical].corrwith(df_train_full.churn).to_frame('correlation')

Unnamed: 0,correlation
tenure,-0.351885
monthlycharges,0.196805
totalcharges,-0.196353


In [35]:
# group churn by the numerical values
df_train_full.groupby(by = 'churn')[numerical].mean()

Unnamed: 0_level_0,tenure,monthlycharges,totalcharges
churn,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,37.531972,61.176477,2548.021627
1,18.070348,74.521203,1545.689415


###### One-Hot Encoding

In [36]:
# DictVectoriser for encoding and transforming categorical variables
from sklearn.feature_extraction import DictVectorizer

In [37]:
# merge cat and num dataset
train_dict = df_train[categorical + numerical].to_dict(orient = 'records')

In [38]:
# view dictionary
train_dict[0]

{'gender': 'male',
 'seniorcitizen': 0,
 'partner': 'yes',
 'dependents': 'no',
 'phoneservice': 'yes',
 'multiplelines': 'no',
 'internetservice': 'dsl',
 'onlinesecurity': 'yes',
 'onlinebackup': 'yes',
 'deviceprotection': 'yes',
 'techsupport': 'yes',
 'streamingtv': 'yes',
 'streamingmovies': 'yes',
 'contract': 'two_year',
 'paperlessbilling': 'yes',
 'paymentmethod': 'bank_transfer_(automatic)',
 'tenure': 71,
 'monthlycharges': 86.1,
 'totalcharges': 6045.9}

In [39]:
# fit the train_dict data in Dictvectoriser
dv = DictVectorizer(sparse = False)
dv.fit(train_dict)

DictVectorizer(sparse=False)

In [40]:
# transform the train_dict data
X_train = dv.transform(train_dict)
X_train.shape

(3774, 45)

In [41]:
# get feautre names of the tranformed data
dv.get_feature_names()



['contract=month-to-month',
 'contract=one_year',
 'contract=two_year',
 'dependents=no',
 'dependents=yes',
 'deviceprotection=no',
 'deviceprotection=no_internet_service',
 'deviceprotection=yes',
 'gender=female',
 'gender=male',
 'internetservice=dsl',
 'internetservice=fiber_optic',
 'internetservice=no',
 'monthlycharges',
 'multiplelines=no',
 'multiplelines=no_phone_service',
 'multiplelines=yes',
 'onlinebackup=no',
 'onlinebackup=no_internet_service',
 'onlinebackup=yes',
 'onlinesecurity=no',
 'onlinesecurity=no_internet_service',
 'onlinesecurity=yes',
 'paperlessbilling=no',
 'paperlessbilling=yes',
 'partner=no',
 'partner=yes',
 'paymentmethod=bank_transfer_(automatic)',
 'paymentmethod=credit_card_(automatic)',
 'paymentmethod=electronic_check',
 'paymentmethod=mailed_check',
 'phoneservice=no',
 'phoneservice=yes',
 'seniorcitizen',
 'streamingmovies=no',
 'streamingmovies=no_internet_service',
 'streamingmovies=yes',
 'streamingtv=no',
 'streamingtv=no_internet_servic

###### Training the Model 

In [42]:
# import Logistic Regression from sklearn library
from sklearn.linear_model import LogisticRegression

In [43]:
# fit the model
model = LogisticRegression(solver='liblinear', random_state=1)
model.fit(X_train, y_train)

LogisticRegression(random_state=1, solver='liblinear')

In [44]:
# combining the validation data for encoding
val_dict = df_val[categorical + numerical].to_dict(orient='records') 
X_val = dv.transform(val_dict)

In [45]:
# predict the probability with the Validation dataset
model.predict_proba(X_val)

array([[0.76509452, 0.23490548],
       [0.73114964, 0.26885036],
       [0.68055068, 0.31944932],
       ...,
       [0.94275132, 0.05724868],
       [0.3847724 , 0.6152276 ],
       [0.93872722, 0.06127278]])

In [46]:
# prediction for the customers that are likely to churn
y_pred = model.predict_proba(X_val)[:,1]

In [47]:
y_pred

array([0.23490548, 0.26885036, 0.31944932, ..., 0.05724868, 0.6152276 ,
       0.06127278])

In [48]:
# where churn is greater than .5
churn = y_pred > 0.5

In [49]:
# mean churn prediction with the validation set
(y_val == churn).mean()

0.8016129032258065

###### Model Interpretation

In [50]:
# the first model interpect value
model.intercept_[0]

-0.12198811467233629

In [51]:
# zip the feature names and coefficeint and convert 
# dictionary into to see the weights of each feature
dict(zip(dv.get_feature_names(), model.coef_[0].round(3)))



{'contract=month-to-month': 0.563,
 'contract=one_year': -0.086,
 'contract=two_year': -0.599,
 'dependents=no': -0.03,
 'dependents=yes': -0.092,
 'deviceprotection=no': 0.1,
 'deviceprotection=no_internet_service': -0.116,
 'deviceprotection=yes': -0.106,
 'gender=female': -0.027,
 'gender=male': -0.095,
 'internetservice=dsl': -0.323,
 'internetservice=fiber_optic': 0.317,
 'internetservice=no': -0.116,
 'monthlycharges': 0.001,
 'multiplelines=no': -0.168,
 'multiplelines=no_phone_service': 0.127,
 'multiplelines=yes': -0.081,
 'onlinebackup=no': 0.136,
 'onlinebackup=no_internet_service': -0.116,
 'onlinebackup=yes': -0.142,
 'onlinesecurity=no': 0.258,
 'onlinesecurity=no_internet_service': -0.116,
 'onlinesecurity=yes': -0.264,
 'paperlessbilling=no': -0.213,
 'paperlessbilling=yes': 0.091,
 'partner=no': -0.048,
 'partner=yes': -0.074,
 'paymentmethod=bank_transfer_(automatic)': -0.027,
 'paymentmethod=credit_card_(automatic)': -0.136,
 'paymentmethod=electronic_check': 0.175,


In [52]:
# select a few feature to train
subset = ['contract', 'tenure', 'totalcharges']
train_dict_small = df_train[subset].to_dict(orient='records')
dv_small = DictVectorizer(sparse=False)
dv_small.fit(train_dict_small)

X_small_train = dv_small.transform(train_dict_small)

dv_small.get_feature_names()



['contract=month-to-month',
 'contract=one_year',
 'contract=two_year',
 'tenure',
 'totalcharges']

In [53]:
# train the model
model_small = LogisticRegression(solver='liblinear', random_state=1)
model_small.fit(X_small_train, y_train)

LogisticRegression(random_state=1, solver='liblinear')

In [54]:
# interpect value for the model
model_small.intercept_[0]

-0.5772299133614702

In [55]:
# zip the feature names and coefficeint and convert 
# dictionary into to see the weights of each feature
dict(zip(dv_small.get_feature_names(), model_small.coef_[0].round(3)))



{'contract=month-to-month': 0.866,
 'contract=one_year': -0.327,
 'contract=two_year': -1.117,
 'tenure': -0.094,
 'totalcharges': 0.001}

In [56]:
# transform the validation model like before
val_dict_small = df_val[subset].to_dict(orient='records')
X_small_val = dv_small.transform(val_dict_small)

In [57]:
# get the prediction model for the first model
y_pred_small = model_small.predict_proba(X_small_val)[:, 1]

###### Using the model

In [58]:
customer = {
    'customerid': '8879-zkjof',
    'gender': 'female',
    'seniorcitizen': 0,
    'partner': 'no',
    'dependents': 'no',
    'tenure': 41,
    'phoneservice': 'yes',
    'multiplelines': 'no',
    'internetservice': 'dsl',
    'onlinesecurity': 'yes',
    'onlinebackup': 'no',
    'deviceprotection': 'yes',
    'techsupport': 'yes',
    'streamingtv': 'yes',
    'streamingmovies': 'yes',
    'contract': 'one_year',
    'paperlessbilling': 'yes',
    'paymentmethod': 'bank_transfer_(automatic)',
    'monthlycharges': 79.85,
    'totalcharges': 3320.75,
}

In [59]:
# transform the categorical features and make prediction
X_test = dv.transform([customer])
model.predict_proba(X_test)[0, 1]

0.07332577315357781

In [60]:
# print a list of the first test column
print(list(X_test[0]))

[0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 79.85, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 41.0, 3320.75]


In [61]:
# repeat as before
customer = {
    'gender': 'female',
    'seniorcitizen': 1,
    'partner': 'no',
    'dependents': 'no',
    'phoneservice': 'yes',
    'multiplelines': 'yes',
    'internetservice': 'fiber_optic',
    'onlinesecurity': 'no',
    'onlinebackup': 'no',
    'deviceprotection': 'no',
    'techsupport': 'no',
    'streamingtv': 'yes',
    'streamingmovies': 'no',
    'contract': 'month-to-month',
    'paperlessbilling': 'yes',
    'paymentmethod': 'electronic_check',
    'tenure': 1,
    'monthlycharges': 85.7,
    'totalcharges': 85.7
}

In [62]:
# get the prediction for the test model
y_pred_small = model_small.predict_proba(X_small_val)[:, 1]
X_test = dv.transform([customer])
model.predict_proba(X_test)[0, 1]

0.8321638622459152