In [8]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.impute import SimpleImputer

from sklearn.ensemble import RandomForestClassifier

data = pd.read_csv('Churn_Modelling.csv')
df = data.copy()

# Separate features and target
X = df.drop('Exited', axis=1)
y = df['Exited']

# Exclude ID columns (not predictive features)
id_columns = ['RowNumber', 'CustomerId','Surname']
X = X.drop(id_columns, axis=1, errors='ignore')

# Select columns from X only (excluding target and IDs)
categorical_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()







categorical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OrdinalEncoder(
        handle_unknown='use_encoded_value',
        unknown_value=-1
    ))
])



numerical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])


preprocessor = ColumnTransformer(
    transformers=[

        ('cat', categorical_pipeline , categorical_cols),
        
        ('num', numerical_pipeline, numerical_cols)
    ]
)



model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])


model.fit(X, y)

X_transformed = model.named_steps['preprocessor'].transform(X)



In [9]:
X_transformed

array([[ 0.        ,  0.        , -0.32622142, ...,  0.64609167,
         0.97024255,  0.02188649],
       [ 2.        ,  0.        , -0.44003595, ..., -1.54776799,
         0.97024255,  0.21653375],
       [ 0.        ,  0.        , -1.53679418, ...,  0.64609167,
        -1.03067011,  0.2406869 ],
       ...,
       [ 0.        ,  0.        ,  0.60498839, ..., -1.54776799,
         0.97024255, -1.00864308],
       [ 1.        ,  1.        ,  1.25683526, ...,  0.64609167,
        -1.03067011, -0.12523071],
       [ 0.        ,  0.        ,  1.46377078, ...,  0.64609167,
        -1.03067011, -1.07636976]])

In [None]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.impute import SimpleImputer

from sklearn.ensemble import RandomForestClassifier

data = pd.read_csv('Churn_Modelling.csv')
df = data.copy()

# Separate features and target
X = df.drop('Exited', axis=1)
y = df['Exited']

# Exclude ID columns (not predictive features)
id_columns = ['RowNumber', 'CustomerId','Surname']
X = X.drop(id_columns, axis=1, errors='ignore')

# Select columns from X only (excluding target and IDs)
categorical_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()







categorical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OrdinalEncoder(
        handle_unknown='use_encoded_value',
        unknown_value=-1
    ))
])



numerical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])


preprocessor = ColumnTransformer(
    transformers=[

        ('cat', categorical_pipeline , categorical_cols),
        
        ('num', numerical_pipeline, numerical_cols)
    ]
)



model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])


model.fit(X, y)

X_transformed = model.named_steps['preprocessor'].transform(X)

