In [1]:
#pip install flaml[auto]==2.3.3

import flaml
import joblib
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.metrics import f1_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder

from transformers import CustomerIdTransformer

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv('dataset/WA_Fn-UseC_-Telco-Customer-Churn.csv')

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [5]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [6]:
X = df.drop(columns=["Churn"], axis=1)
y = df["Churn"]

In [7]:
def build_pipeline():
    id_columns = ["customerID"]
    categorical_features = list(
        X.loc[:, df.dtypes == 'object'].columns.values
    )
    categorical_features = [
        f for f in categorical_features if not f in id_columns
    ]
    id_transformer = (
        "customer_id",
        CustomerIdTransformer(id_columns),
        id_columns
    )
    encode_transformer = (
        "encoder",
        OneHotEncoder(sparse_output=False),
        categorical_features
    )
    preprocessor = ColumnTransformer(
        transformers=[
            id_transformer,
            encode_transformer,
        ],
        remainder='passthrough'
    )
    automl_settings = {
        "time_budget": 120,
        "metric": "accuracy",
        "task": "classification",
        "estimator_list": ["lgbm", "rf"],
        "custom_hp": {
            "n_estimators": {
                "domain": flaml.tune.uniform(20, 500)
            }
        },
        "verbose": -1
    }
    pipeline_settings = {
        f"automl__{key}": value for key, value in automl_settings.items()
    }
    automl = flaml.AutoML()
    pipeline = Pipeline(
        steps=[("preprocessor", preprocessor),
               ("automl", automl)]
    )
    return pipeline, pipeline_settings

In [8]:
pipeline, settings = build_pipeline()


In [9]:
pipeline.fit(X, y, **settings)

The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).



In [10]:
pipeline.predict(X.iloc[[0]])

array(['Yes'], dtype=object)

In [11]:
joblib.dump(pipeline,"churn.pipeline.pkl")

['churn.pipeline.pkl']

## Gradio

In [None]:
#!pip install gradio
#!pip install --upgrade gradio fsspec
#!pip install markupsafe==2.0.1
#import gradio as gr
