In [None]:
from sklearn.feature_selection import SelectKBest, chi2
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [None]:
df = pd.read_csv('credit_score.csv')
df = df.drop(['Name','Customer_ID', 'ID',"SSN"], axis=1)

In [None]:
numerical_cols = df.select_dtypes(include=['int64', 'float64']).columns
relevant_cols = [col for col in numerical_cols if col not in ['Month']]

In [None]:
def apply_k_beast(X, y):
    bestfeatures = SelectKBest(score_func=chi2, k='all')

    fit = bestfeatures.fit(X, y)

    # Get the scores for each feature
    feature_scores = pd.DataFrame({'Feature': relevant_cols, 'Score': fit.scores_}).sort_values(by='Score', ascending=False)
    selected_features = feature_scores[feature_scores['Score'] > 1e5]['Feature'].tolist()
    return selected_features

In [None]:
def random_classifier(X_train, X_val, y_train, y_val, X_predict, label_encoder):
    # Initialize the Random Forest Classifier
    rf_classifier = RandomForestClassifier(n_estimators=50, random_state=42)

    # Train the model on the training set
    rf_classifier.fit(X_train, y_train)

    # Evaluate the model on the validation set
    y_val_pred = rf_classifier.predict(X_val)
    val_accuracy = accuracy_score(y_val, y_val_pred)
    #Predict the 'Type_of_Loan' for the prediction set
    y_predict_encoded = rf_classifier.predict(X_predict)

    # Decode the predicted labels back to original categories
    y_predict = label_encoder.inverse_transform(y_predict_encoded)
    return y_predict

In [None]:
def label_train_data(train_data, predict_data, selected_features, column):
    X_train_full = train_data[selected_features]
    y_train_full = train_data[column]

    # Label encode the target variable
    label_encoder = LabelEncoder()
    y_train_full_encoded = label_encoder.fit_transform(y_train_full)

    # Split the data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full_encoded, test_size=0.2, random_state=42)

    # Prepare the prediction set
    X_predict = predict_data[selected_features]
    y_predict = random_classifier(X_train, X_val, y_train, y_val, X_predict, label_encoder)
    return y_predict

In [None]:
df_for_feature_selection = df[df['Type_of_Loan'] != 'No Data'] #

X = df_for_feature_selection[relevant_cols]  # Feature matrix
y = df_for_feature_selection['Type_of_Loan']  # Target variable #
selected_features = apply_k_beast(X, y)

In [None]:
train_data = df[df['Type_of_Loan'] != 'No Data']
# Include only rows with 'No Data' in 'Type_of_Loan' for the prediction set
predict_data = df[df['Type_of_Loan'] == 'No Data']
y_predict = label_train_data(train_data, predict_data, selected_features, 'Type_of_Loan')

In [None]:
df.loc[df['Type_of_Loan'] == 'No Data', 'Type_of_Loan'] = y_predict

In [None]:
df_for_feature_selection = df[df['Payment_of_Min_Amount'] != 'NM']
X = df_for_feature_selection[relevant_cols]  # Feature matrix
y = df_for_feature_selection['Payment_of_Min_Amount']  # Target variable #
selected_features = apply_k_beast(X, y)

In [None]:
train_data = df[df['Payment_of_Min_Amount'] != 'NM']
predict_data = df[df['Payment_of_Min_Amount'] == 'NM']
y_predict = label_train_data(train_data, predict_data, selected_features, 'Payment_of_Min_Amount')


In [None]:
df.loc[df['Payment_of_Min_Amount'] == 'NM', 'Payment_of_Min_Amount'] = y_predict

In [None]:
df.to_csv('remove_null.csv', index=False)