In [5]:
import pandas as pd
import numpy as np

In [6]:
data=pd.read_csv("CreditScore.csv")

In [None]:
data.head(5)

In [None]:
data.info()

In [None]:
data.tail()

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import precision_score, recall_score

In [None]:
data.dropna(subset=['Credit_Score'], inplace=True)

In [None]:
data.shape

In [None]:
# Define columns based on data types for preprocessing
numeric_features = data.select_dtypes(include=['float64', 'int64']).columns.tolist()
categorical_features = ['Type_of_Loan', 'Credit_Mix', 'Payment_Behaviour']

In [None]:
# Defining transformers for different types of features
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])


In [None]:

# Combining transformers using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])


In [None]:
# Splitting the data into train and test sets
y = data['Credit_Score']
X = data.drop('Credit_Score', axis=1)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Creating the pipeline with RandomForestClassifier
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', RandomForestClassifier())])

In [None]:


# Cross-validation score
cv_score = cross_val_score(pipeline, X_train, y_train, cv=5)
print(f"Cross Validation Score: {cv_score.mean()}")

In [None]:


# Define RandomForestClassifier hyperparameters for GridSearchCV
param_grid = {
    'classifier__n_estimators': [100, 200, 300],
    'classifier__max_depth': [None, 10, 20, 30],
    'classifier__min_samples_split': [2, 5, 10],
    'classifier__min_samples_leaf': [1, 2, 4]
}

# GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Training the model with best parameters
best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)

# Making predictions
y_pred = best_model.predict(X_test)

# Evaluating precision and recall
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
print(f"Precision: {precision}")
print(f"Recall: {recall}")

# Displaying the 10 most important features
feature_importances = best_model.named_steps['classifier'].feature_importances_
feature_names = numeric_features + \
                list(best_model.named_steps['preprocessor']
                     .named_transformers_['cat']
                     .named_steps['onehot']
                     .get_feature_names(categorical_features))

importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importances})
top_10_features = importance_df.sort_values(by='Importance', ascending=False).head(10)
print("Top 10 Most Important Features:")
print(top_10_features)


In [None]:
pip install torch

In [15]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.metrics import precision_score, recall_score
from scipy import sparse
from sklearn.preprocessing import LabelEncoder



# Assuming your data is stored in a DataFrame called 'data'
# Replace 'data.csv' with your actual file name or data source
# data = pd.read_csv('data.csv')




In [16]:
# Dropping rows with NaN values in the target variable (Credit_Score)
data.dropna(subset=['Credit_Score'], inplace=True)


# Preprocessing pipeline
numeric_features = data.select_dtypes(include=['float64', 'int64']).columns.tolist()
categorical_features = ['Type_of_Loan', 'Credit_Mix', 'Payment_Behaviour']

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])



In [17]:
# Splitting the data into train and test sets
X = data.drop('Credit_Score', axis=1)
y = data['Credit_Score']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [18]:

# Preprocessing data
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

In [19]:


# Convert to dense array before converting to PyTorch tensors
if sparse.issparse(X_train):
    X_train = X_train.toarray()
if sparse.issparse(X_test):
    X_test = X_test.toarray()


In [20]:
# Assuming 'Credit_Score' is the target column
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

In [22]:
# Convert PyTorch tensors to NumPy arrays
X_train_np = X_train.astype(np.float32)
X_test_np = X_test.astype(np.float32)

In [23]:
# Convert NumPy arrays to PyTorch tensors
X_train = torch.tensor(X_train_np)
X_test = torch.tensor(X_test_np)
y_train = torch.tensor(y_train)
y_test = torch.tensor(y_test)

In [24]:

# Define a simple neural network model
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [28]:
y_test.unique()

tensor([0, 1, 2], dtype=torch.int32)

In [29]:
# Hyperparameters
input_size = X_train.shape[1]
hidden_size = 128
num_classes = 3
learning_rate = 0.001
num_epochs = 10
batch_size = 64

In [30]:
# Initialize the model, loss function, and optimizer
model = NeuralNet(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# Training the model
for epoch in range(num_epochs):
    for i in range(0, len(X_train), batch_size):
        inputs = X_train[i:i+batch_size]
        targets = y_train[i:i+batch_size].long()  # Convert to torch.long

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


In [None]:
# Evaluating the model
with torch.no_grad():
    outputs = model(X_test)
    _, predicted = torch.max(outputs, 1)
    precision = precision_score(y_test, predicted, average='weighted')
    recall = recall_score(y_test, predicted, average='weighted')

print(f'Precision: {precision}')
print(f'Recall: {recall}')