# Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

# Load the dataset (example: Iris dataset)
data = load_iris()
X = data.data
y = data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a logistic regression model
logistic_model = LogisticRegression(solver='lbfgs', max_iter=1000)

# Fit the model on the training data
logistic_model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = logistic_model.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 1.0


# Decision Tree

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the dataset (example: Iris dataset)
data = load_iris()
X = data.data
y = data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a decision tree classifier
decision_tree = DecisionTreeClassifier()

# Fit the classifier on the training data
decision_tree.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = decision_tree.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 1.0


#Random Forest

In [None]:
# Importing the necessary libraries
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

# Create a Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
rf_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_classifier.predict(X_test)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 1.0


#K-Nearest Neighbours (K-NN)

In [None]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the UCI Heart Disease dataset

dataset = pd.read_csv('/content/UCI Heart Disease.csv')

X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values


# preparing training and testing set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

# scaling the train and test set
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


model = KNeighborsClassifier()

# get the optimal value of K using GridSearch
from sklearn.model_selection import GridSearchCV
# set the range of k
k = list(range(1,41))
param_grid  = {'n_neighbors':k,'metric':['euclidean','minkowski']}
grid = GridSearchCV(model, param_grid,scoring='accuracy', return_train_score=False)
grid.fit(X_train,y_train)
print(grid.best_params_)

# finalizing the model with k=16
KNN_classifier = KNeighborsClassifier(n_neighbors=16, metric='euclidean')
# training the classifier
KNN_classifier.fit(X_train,y_train)

# test score
print('Test score:')
KNN_classifier.score(X_test,y_test)

{'metric': 'euclidean', 'n_neighbors': 16}
Test score:


0.8360655737704918

#Naive Bayes

In [None]:
# Importing the required library
import pandas as pd

# Reading the dataset
data = pd.read_csv('https://raw.githubusercontent.com/vaibhav05cse/MLGB-Datasets/main/Iris.csv')

# Defining input (X) and output (y) features
X = data.iloc[:, 1:-1].values
y = data.iloc[:, -1].values

# Creating training and test patterns
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, shuffle=True, random_state = 0)

# input patterns
X_train[:11]

# Min-Max Scaling
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Defining the Gaussian Naive Bayes
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()

# Training the SVM classifier
classifier.fit(X_train, y_train)

# Making predictions with the test data
y_pred = classifier.predict(X_test)

# Combining the actual and predicted values
pd.DataFrame(data={'Actual Labels': y_test, 'Predicted Labels': y_pred}).head()

# Accuracy score
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

# Classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        11
Iris-versicolor       0.93      1.00      0.96        13
 Iris-virginica       1.00      0.83      0.91         6

       accuracy                           0.97        30
      macro avg       0.98      0.94      0.96        30
   weighted avg       0.97      0.97      0.97        30



#Support Vector Machine

In [None]:
# Importing the required library
import pandas as pd

# Reading the dataset
data = pd.read_csv('https://raw.githubusercontent.com/vaibhav05cse/MLGB-Datasets/main/diabetes.csv')

# Defining input (X) and output (y) features
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Creating training and test patterns
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, shuffle=True, random_state = 0)

# Feature scaling (z-standardization)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Defining the SVM classification model
from sklearn.svm import SVC
SVM_classifier = SVC(kernel = 'linear', random_state = 0)

# Training the SVM classifier
SVM_classifier.fit(X_train, y_train)

# Making predictions with the test data
y_pred = SVM_classifier.predict(X_test)

# Classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.85      0.91      0.88        78
           1       0.78      0.66      0.71        38

    accuracy                           0.83       116
   macro avg       0.81      0.78      0.80       116
weighted avg       0.82      0.83      0.82       116



#Classification  Evaluation Metrics

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Load the dataset
iris_data = load_iris()
X = iris_data.data
y = iris_data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit a classification model
classifier = LogisticRegression()
classifier.fit(X_train, y_train)

# Make predictions
y_pred = classifier.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')  # Macro-average precision
recall = recall_score(y_test, y_pred, average='macro')  # Macro-average recall
f1 = f1_score(y_test, y_pred, average='macro')  # Macro-average F1 score
auc_roc = roc_auc_score(y_test, classifier.predict_proba(X_test), multi_class='ovr')  # AUC-ROC score
conf_matrix = confusion_matrix(y_test, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("AUC-ROC Score:", auc_roc)
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
AUC-ROC Score: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
