In [86]:
import pandas as pd 
import numpy as np  
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, make_scorer
from sklearn.preprocessing import StandardScaler



In [87]:
df = pd.read_csv("Airline Passenger Satisfaction.csv")


Preprocessing Data

In [91]:
#Prepare data
X = df.drop(['satisfaction', 'id', 'Customer Type', 'index', 'satisfaction score'], axis=1)
y = df['satisfaction']

#Encode target variable
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

#Handle categorical columns
categorical_columns = ['Gender', 'Type of Travel', 'Class']

ct = ColumnTransformer([('encoder', OneHotEncoder(), categorical_columns)], remainder='passthrough')
X_encoded = ct.fit_transform(X)



In [92]:
#Splitting Data into training and test sets

X_train, X_test, y_train, y_test = train_test_split(X_encoded, y_encoded, test_size=0.2, random_state=42)


In [93]:
#Preprocess the features (optional but can aid convergence)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#Create and train the Logistic Regression model with different solvers and regularization
solvers = ['lbfgs', 'liblinear', 'saga']
Cs = [0.001, 0.01, 0.1, 1, 10, 100]  # Values for the regularization parameter

best_accuracy = 0
best_model = None

for solver in solvers:
    for C in Cs:
        model = LogisticRegression(solver=solver, C=C, max_iter=5000)  # Try different combinations of solvers and C
        model.fit(X_train_scaled, y_train)

        # Step 7: Evaluate the model
        y_pred = model.predict(X_test_scaled)
        accuracy = accuracy_score(y_test, y_pred)

        print(f"Model's Accuracy (solver={solver}, C={C}, max_iter=5000): {accuracy}")

        # Keep track of the best model based on accuracy
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = model

#Get the individual coefficients of the best model
if best_model is not None:
    best_coefficients = best_model.coef_[0]

    # Print the individual coefficients
    print("\nBest Model's Individual Coefficients:")
    for feature, coef in zip(X.columns, best_coefficients):
        print(f"{feature}: {coef}")

Model's Accuracy (solver=lbfgs, C=0.001, max_iter=5000): 0.8586387434554974
Model's Accuracy (solver=lbfgs, C=0.01, max_iter=5000): 0.8587542346781645
Model's Accuracy (solver=lbfgs, C=0.1, max_iter=5000): 0.8584462580843856
Model's Accuracy (solver=lbfgs, C=1, max_iter=5000): 0.8584077610101633
Model's Accuracy (solver=lbfgs, C=10, max_iter=5000): 0.8584077610101633
Model's Accuracy (solver=lbfgs, C=100, max_iter=5000): 0.8584077610101633
Model's Accuracy (solver=liblinear, C=0.001, max_iter=5000): 0.8580227902679396
Model's Accuracy (solver=liblinear, C=0.01, max_iter=5000): 0.8586002463812751
Model's Accuracy (solver=liblinear, C=0.1, max_iter=5000): 0.8584077610101633
Model's Accuracy (solver=liblinear, C=1, max_iter=5000): 0.8584077610101633
Model's Accuracy (solver=liblinear, C=10, max_iter=5000): 0.8584077610101633
Model's Accuracy (solver=liblinear, C=100, max_iter=5000): 0.8584077610101633
Model's Accuracy (solver=saga, C=0.001, max_iter=5000): 0.8586387434554974
Model's Accur