1


In [7]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.impute import SimpleImputer

from sklearn.ensemble import RandomForestClassifier

data = pd.read_csv('Churn_Modelling.csv')
df = data.copy()

# Separate features and target
X = df.drop('Exited', axis=1)
y = df['Exited']

# Select columns from X only (excluding target)
categorical_cols = X.select_dtypes(include=['object', 'category']).columns
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns







categorical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OrdinalEncoder(
        handle_unknown='use_encoded_value',
        unknown_value=-1
    ))
])



numerical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])


preprocessor = ColumnTransformer(
    transformers=[

        ('cat', categorical_pipeline , categorical_cols),
        
        ('num', numerical_pipeline, numerical_cols)
    ]
)



model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])



3

In [None]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LogisticRegression

data = pd.read_csv('Churn_Modelling.csv')
df = data.copy()

X = df.drop('Exited', axis=1)
y = df['Exited']

# Convert to lists for ColumnTransformer compatibility
categorical_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

categorical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OrdinalEncoder(
        handle_unknown='use_encoded_value',
        unknown_value=-1
    ))
])

numerical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_pipeline, categorical_cols),
        ('num', numerical_pipeline, numerical_cols)
    ]
)

model = Pipeline([
    ('preprocessor', preprocessor),
    ('feature_selection', SelectFromModel(
        LogisticRegression(
            penalty='l1',
            solver='liblinear',
            C=1.0,
            max_iter=1000,
            random_state=42
        )
    )),
    ('classifier', LogisticRegression(
        penalty='l2',
        max_iter=1000,
        random_state=42
    ))
])

# Fit the model
model.fit(X, y)

transformed_X = model.nxamed_steps['feature_selection'].transform(
    model.named_steps['preprocessor'].transform(X)
)


In [5]:
transformed_X[0]

array([ 1.11500000e+03,  0.00000000e+00,  0.00000000e+00, -1.73187761e+00,
       -7.83213423e-01, -3.26221422e-01,  2.93517423e-01, -1.04175968e+00,
       -1.22584767e+00, -9.11583494e-01,  6.46091668e-01,  9.70242551e-01,
        2.18864940e-02])