# Credit Card Retention Analysis

## Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.graph_objs as go
from plotly.offline import iplot
sns.set()
pd.options.display.max_columns = 999

In [2]:
data = pd.read_csv('../data/BankChurners_v2.csv')

In [3]:
data = data[['CLIENTNUM', 'Attrition_Flag', 'Customer_Age', 'Gender',
       'Dependent_count', 'Education_Level', 'Marital_Status',
       'Income_Category', 'Card_Category', 'Months_on_book',
       'Total_Relationship_Count', 'Months_Inactive_12_mon',
       'Contacts_Count_12_mon', 'Credit_Limit', 'Total_Revolving_Bal',
       'Avg_Open_To_Buy', 'Total_Amt_Chng_Q4_Q1', 'Total_Trans_Amt',
       'Total_Trans_Ct', 'Total_Ct_Chng_Q4_Q1', 'Avg_Utilization_Ratio',]]

In [4]:
data['Education_Level'] = data['Education_Level'].fillna('Unknown')
data['Marital_Status'] = data['Marital_Status'].fillna('Unknown')
data['Income_Category'] = data['Income_Category'].fillna('Unknown')

In [5]:
# https://towardsdatascience.com/data-preprocessing-with-python-pandas-part-5-binning-c5bd5fd1b950
bins = [25, 30, 40, 50, 60, 70, 80]
labels = ['20s', '30s', '40s', '50s', '60s', '70s']
data['Customer_Age_bins'] = pd.cut(data['Customer_Age'], bins=bins, labels=labels, include_lowest=True, right=False)

***

## Data Visualization: Data Tables

In [10]:
data.columns

Index(['CLIENTNUM', 'Attrition_Flag', 'Customer_Age', 'Gender',
       'Dependent_count', 'Education_Level', 'Marital_Status',
       'Income_Category', 'Card_Category', 'Months_on_book',
       'Total_Relationship_Count', 'Months_Inactive_12_mon',
       'Contacts_Count_12_mon', 'Credit_Limit', 'Total_Revolving_Bal',
       'Avg_Open_To_Buy', 'Total_Amt_Chng_Q4_Q1', 'Total_Trans_Amt',
       'Total_Trans_Ct', 'Total_Ct_Chng_Q4_Q1', 'Avg_Utilization_Ratio',
       'Customer_Age_bins'],
      dtype='object')

In [12]:
data_pivot = data.groupby(['Attrition_Flag']).agg({
  'CLIENTNUM':'nunique',
  'Customer_Age':'mean',
  'Months_on_book':'mean',
  'Credit_Limit':'mean'
}).T
data_pivot

Attrition_Flag,Attrited Customer,Existing Customer
CLIENTNUM,1627.0,8500.0
Customer_Age,46.659496,46.262118
Months_on_book,36.178242,35.880588
Credit_Limit,8136.039459,8726.877518


Styling a dataframe [guide](https://towardsdatascience.com/style-pandas-dataframe-like-a-master-6b02bf6468b0). Row-wise, [here](https://stackoverflow.com/questions/52783419/format-pandas-dataframe-row-wise)

In [15]:
data_pivot['Diff'] = data_pivot['Attrited Customer']/data_pivot['Existing Customer'] - 1
data_pivot.sort_values("Diff")

Attrition_Flag,Attrited Customer,Existing Customer,Diff
CLIENTNUM,1627.0,8500.0,-0.808588
Credit_Limit,8136.039459,8726.877518,-0.067703
Months_on_book,36.178242,35.880588,0.008296
Customer_Age,46.659496,46.262118,0.00859
