In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas.api.types import is_numeric_dtype

In [13]:
customer_churn = pd.read_csv('telecom_customer_churn.csv')


In [14]:
#check nulls
def check_nulls(df):
        #create a dectionary to import num of nulls for each column in
    nulls = {}
    for col in df.columns:
        nulls[col] = df[col].isna().sum()
    return nulls

In [15]:
#create a DEF to check outliers for all data
def check_outliers(df):
    #create a dectionary to import num of outlier for each column in
    results = {}
    for col in df.columns:
        if is_numeric_dtype(df[col]): #check that all columns type numeric only
            q1 = df[col].quantile(0.25) # frist quarter of data
            q3 = df[col].quantile(0.75) #third quarter of data
        
            iqr = q3-q1 # IQR
            lower = q1 - 1.5 * iqr #lower outlier
            upper = q3 + 1.5 * iqr # max outlier
    
            outliers = (df[col] < lower) | (df[col] > upper) # check outlier
            results[col] = {
                "outlier_count": outliers.sum(), #return outlier count
                "lower_bound": lower, #return min value for outlier
                "upper_bound": upper # return max value of outlier
            }
    return results

In [16]:
check_outliers(customer_churn)

{'Age': {'outlier_count': np.int64(0),
  'lower_bound': np.float64(-10.0),
  'upper_bound': np.float64(102.0)},
 'Number of Dependents': {'outlier_count': np.int64(1627),
  'lower_bound': np.float64(0.0),
  'upper_bound': np.float64(0.0)},
 'Zip Code': {'outlier_count': np.int64(0),
  'lower_bound': np.float64(87259.0),
  'upper_bound': np.float64(100171.0)},
 'Latitude': {'outlier_count': np.int64(0),
  'lower_bound': np.float64(27.734633499999994),
  'upper_bound': np.float64(44.417333500000005)},
 'Longitude': {'outlier_count': np.int64(0),
  'lower_bound': np.float64(-127.51553249999998),
  'upper_bound': np.float64(-112.24235250000001)},
 'Number of Referrals': {'outlier_count': np.int64(676),
  'lower_bound': np.float64(-4.5),
  'upper_bound': np.float64(7.5)},
 'Tenure in Months': {'outlier_count': np.int64(0),
  'lower_bound': np.float64(-60.0),
  'upper_bound': np.float64(124.0)},
 'Avg Monthly Long Distance Charges': {'outlier_count': np.int64(0),
  'lower_bound': np.float64(

In [17]:
check_nulls(customer_churn)

{'Customer ID': np.int64(0),
 'Gender': np.int64(0),
 'Age': np.int64(0),
 'Married': np.int64(0),
 'Number of Dependents': np.int64(0),
 'City': np.int64(0),
 'Zip Code': np.int64(0),
 'Latitude': np.int64(0),
 'Longitude': np.int64(0),
 'Number of Referrals': np.int64(0),
 'Tenure in Months': np.int64(0),
 'Offer': np.int64(3877),
 'Phone Service': np.int64(0),
 'Avg Monthly Long Distance Charges': np.int64(682),
 'Multiple Lines': np.int64(682),
 'Internet Service': np.int64(0),
 'Internet Type': np.int64(1526),
 'Avg Monthly GB Download': np.int64(1526),
 'Online Security': np.int64(1526),
 'Online Backup': np.int64(1526),
 'Device Protection Plan': np.int64(1526),
 'Premium Tech Support': np.int64(1526),
 'Streaming TV': np.int64(1526),
 'Streaming Movies': np.int64(1526),
 'Streaming Music': np.int64(1526),
 'Unlimited Data': np.int64(1526),
 'Contract': np.int64(0),
 'Paperless Billing': np.int64(0),
 'Payment Method': np.int64(0),
 'Monthly Charge': np.int64(0),
 'Total Charges

In [31]:
customer_churn['Internet Type'].fillna("No internet",inplace=True)
customer_churn['Device Protection Plan'].fillna('No',inplace=True)
customer_churn['Churn Category'].fillna("Stayed" , inplace=True)
customer_churn['Churn Reason'].fillna("Stayed" , inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  customer_churn['Churn Reason'].fillna("Stayed" , inplace=True)


In [34]:
customer_churn['Churn Reason'].value_counts()
customer_churn['Churn Category'].value_counts()

Churn Category
Stayed             5174
Competitor          841
Dissatisfaction     321
Attitude            314
Price               211
Other               182
Name: count, dtype: int64