# Customer Churn Analysis and Reduction for Subscription Service

In [1]:
import pandas as pd
import plotly.express as px
#import sys
#!{sys.executable} -m pip install plotly kaleido
#!{sys.executable} -m pip install --upgrade nbformat

### Load dataset and perform initial cleaning

In [5]:
DATASET_PATH = "../dataset/cancelamentos2.csv"

df = pd.read_csv(DATASET_PATH, encoding="latin1")
df = df.drop(["Unnamed: 0", "Codigo"], axis=1)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5986 entries, 0 to 5985
Data columns (total 21 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   IDCliente               5986 non-null   object 
 1   Genero                  5986 non-null   object 
 2   Aposentado              5986 non-null   int64  
 3   Casado                  5986 non-null   object 
 4   Dependentes             5985 non-null   object 
 5   MesesComoCliente        5986 non-null   int64  
 6   ServicoTelefone         5986 non-null   object 
 7   MultiplasLinhas         5986 non-null   object 
 8   ServicoInternet         5986 non-null   object 
 9   ServicoSegurancaOnline  5986 non-null   object 
 10  ServicoBackupOnline     5986 non-null   object 
 11  ProtecaoEquipamento     5986 non-null   object 
 12  ServicoSuporteTecnico   5986 non-null   object 
 13  ServicoStreamingTV      5986 non-null   object 
 14  ServicoFilmes           5986 non-null   

In [6]:
df = df.dropna()
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5984 entries, 0 to 5985
Data columns (total 21 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   IDCliente               5984 non-null   object 
 1   Genero                  5984 non-null   object 
 2   Aposentado              5984 non-null   int64  
 3   Casado                  5984 non-null   object 
 4   Dependentes             5984 non-null   object 
 5   MesesComoCliente        5984 non-null   int64  
 6   ServicoTelefone         5984 non-null   object 
 7   MultiplasLinhas         5984 non-null   object 
 8   ServicoInternet         5984 non-null   object 
 9   ServicoSegurancaOnline  5984 non-null   object 
 10  ServicoBackupOnline     5984 non-null   object 
 11  ProtecaoEquipamento     5984 non-null   object 
 12  ServicoSuporteTecnico   5984 non-null   object 
 13  ServicoStreamingTV      5984 non-null   object 
 14  ServicoFilmes           5984 non-null   objec

In [7]:
df.head()

Unnamed: 0,IDCliente,Genero,Aposentado,Casado,Dependentes,MesesComoCliente,ServicoTelefone,MultiplasLinhas,ServicoInternet,ServicoSegurancaOnline,...,ProtecaoEquipamento,ServicoSuporteTecnico,ServicoStreamingTV,ServicoFilmes,TipoContrato,FaturaDigital,FormaPagamento,ValorMensal,TotalGasto,Churn
0,7010-BRBUU,Masculino,0,Sim,Sim,72,Sim,Sim,Nao,SemInternet,...,SemInternet,SemInternet,SemInternet,SemInternet,2 anos,Nao,CartaoCredito,24.1,1734.65,Nao
1,9688-YGXVR,Feminino,0,Nao,Nao,44,Sim,Nao,Fibra,Nao,...,Sim,Nao,Sim,Nao,Mensal,Sim,CartaoCredito,88.15,3973.2,Nao
2,9286-DOJGF,Feminino,1,Sim,Nao,38,Sim,Sim,Fibra,Nao,...,Nao,Nao,Nao,Nao,Mensal,Sim,DebitoAutomatico,74.95,2869.85,Sim
3,6994-KERXL,Masculino,0,Nao,Nao,4,Sim,Nao,DSL,Nao,...,Nao,Nao,Nao,Sim,Mensal,Sim,BoletoEletronico,55.9,238.5,Nao
4,2181-UAESM,Masculino,0,Nao,Nao,2,Sim,Nao,DSL,Sim,...,Sim,Nao,Nao,Nao,Mensal,Nao,BoletoEletronico,53.45,119.5,Nao


### Print churn distribution

In [8]:
print("\nChurn distribution:")
print(df["Churn"].value_counts())
print(df["Churn"].value_counts(normalize=True).map("{:.1%}".format))


Churn distribution:
Churn
Nao    4397
Sim    1587
Name: count, dtype: int64
Churn
Nao    73.5%
Sim    26.5%
Name: proportion, dtype: object


### Analyze contract type and filter out monthly contracts

In [14]:
if df['Churn'].dtype == object:
    df['Churn'] = df['Churn'].map({'Nao': 0, 'Sim': 1})

print("\nChurn rate by contract type:")
print(df.groupby("TipoContrato")["Churn"].mean().map("{:.1%}".format))


Churn rate by contract type:
TipoContrato
2 anos     2.8%
Anual     11.6%
Mensal    42.8%
Name: Churn, dtype: object


42.8% dos clientes mensais cancelaram a assinatura.
Esse já é um ponto importante dentro da nossa análise, pois existe um plano dessa empresa, onde praticamente todos os clientes fazem o cancelamento do serviço.

In [17]:
print("\nRemoving monthly contracts for further analysis...")
df = df[df["TipoContrato"] != "Mensal"]
print("Churn distribution after removing monthly contracts:")
print(df["Churn"].value_counts(normalize=True).map("{:.1%}".format))
print("'Nao': 0, 'Sim': 1")


Removing monthly contracts for further analysis...
Churn distribution after removing monthly contracts:
Churn
0    93.0%
1     7.0%
Name: proportion, dtype: object
'Nao': 0, 'Sim': 1


Porcentagem de cancelamento diminuiu de 26.5% para 7%.
Vamos continuar avaliando até um valor aceitável.

### Plota histogramas para cada feature por cancelamento

In [20]:
for column in df.columns:
    fig = px.histogram(df, x=column, color='Churn', width=600)
    fig.show()