# Hyperparameter Tuning

---

In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [2]:
credit_card = pd.read_csv('credit-card-full.csv')

In [3]:
credit_card_pseudo = credit_card.copy()

In [4]:
credit_card = pd.get_dummies(credit_card, columns=['SEX', 'EDUCATION', 'MARRIAGE'], drop_first=True)
credit_card.head()

Unnamed: 0,ID,LIMIT_BAL,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,...,SEX_2,EDUCATION_1,EDUCATION_2,EDUCATION_3,EDUCATION_4,EDUCATION_5,EDUCATION_6,MARRIAGE_1,MARRIAGE_2,MARRIAGE_3
0,1,20000,24,2,2,-1,-1,-2,-2,3913,...,1,0,1,0,0,0,0,1,0,0
1,2,120000,26,-1,2,0,0,0,2,2682,...,1,0,1,0,0,0,0,0,1,0
2,3,90000,34,0,0,0,0,0,0,29239,...,1,0,1,0,0,0,0,0,1,0
3,4,50000,37,0,0,0,0,0,0,46990,...,1,0,1,0,0,0,0,1,0,0
4,5,50000,57,-1,0,-1,0,0,0,8617,...,0,0,1,0,0,0,0,1,0,0


In [6]:
X = credit_card.drop(['ID', 'default payment next month'], axis=1)
y = credit_card['default payment next month']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True)

In [7]:
log_reg_clf = LogisticRegression(max_iter=1000)
log_reg_clf.fit(X_train, y_train)

LogisticRegression(max_iter=1000)

In [8]:
# Create a list of original variable names from the training DataFrame
original_variables = X_train.columns

# Extract the coefficients of the logistic regression estimator
model_coefficients = log_reg_clf.coef_[0]

# Create a dataframe of the variables and coefficients & print it out
coefficient_df = pd.DataFrame({"Variable" : original_variables, "Coefficient": model_coefficients})
print(coefficient_df)

# Print out the top 3 positive variables
top_three_df = coefficient_df.sort_values(by='Coefficient', axis=0, ascending=False)[0:3]
print(top_three_df)

       Variable   Coefficient
0     LIMIT_BAL -3.085560e-06
1           AGE -1.668505e-02
2         PAY_0  1.196231e-03
3         PAY_2  9.322236e-04
4         PAY_3  8.053535e-04
5         PAY_4  7.884477e-04
6         PAY_5  7.211242e-04
7         PAY_6  6.817958e-04
8     BILL_AMT1 -8.943346e-06
9     BILL_AMT2  5.632673e-06
10    BILL_AMT3  1.645752e-06
11    BILL_AMT4 -6.955815e-07
12    BILL_AMT5  5.811045e-06
13    BILL_AMT6  9.035735e-07
14     PAY_AMT1 -2.458794e-05
15     PAY_AMT2 -1.888001e-05
16     PAY_AMT3 -7.376125e-06
17     PAY_AMT4 -9.181636e-06
18     PAY_AMT5 -8.051940e-06
19     PAY_AMT6 -3.627002e-07
20        SEX_2 -4.041581e-04
21  EDUCATION_1 -1.180449e-04
22  EDUCATION_2 -2.858248e-04
23  EDUCATION_3 -1.161464e-04
24  EDUCATION_4 -5.674601e-06
25  EDUCATION_5 -2.281105e-05
26  EDUCATION_6 -2.231985e-06
27   MARRIAGE_1 -9.612305e-05
28   MARRIAGE_2 -4.426227e-04
29   MARRIAGE_3 -8.133430e-06
  Variable  Coefficient
2    PAY_0     0.001196
3    PAY_2     0.00093