In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression as lr
from sklearn.tree import DecisionTreeClassifier as dt
from sklearn.ensemble import RandomForestClassifier as rf
from sklearn.ensemble import GradientBoostingClassifier as gb
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score,confusion_matrix,precision_score,recall_score,f1_score,classification_report,ConfusionMatrixDisplay


In [None]:
# Load the dataset
wine=pd.read_csv('/content/WineQT.csv')

In [None]:
# datatype
print(type(wine))

In [None]:
# information related to dataset
print(wine.info())

In [None]:
# checks for the missing values if any
missing=wine.isnull().sum()
print(missing)

In [None]:
# gives a brief about the structure of the dataset
print(wine.head)

In [None]:
# check the features and labels in the dataset
from collections import Counter
x=wine.drop(columns=['quality','Id']) # dropping the target colum(quality) and id column
y=wine['quality']
x.shape
y.shape
print(Counter(y))

In [None]:
# printing the features and targets
print("features:\n",x.head())
print("target:\n",y.head())

Data Visualization

In [None]:
plt.figure(figsize=(20,6))
sns.barplot(x)
plt.title('Features')
plt.show()

In [None]:
plt.figure(figsize=(8,4))
sns.histplot(x=wine['quality'])
plt.title('Target')
plt.show()

In [None]:
plt.figure(figsize=(10,4))
sns.lineplot(x=wine['fixed acidity'],y=wine['quality'],color='red')
plt.title('Fixed Acidity plot')
plt.show()


In [None]:
plt.figure(figsize=(10,4))
sns.lineplot(x=wine['volatile acidity'],y=wine['quality'],color='green')
plt.title('Volatile Acidity plot')
plt.show()


In [None]:
plt.figure(figsize=(10,4))
sns.lineplot(x=wine['citric acid'],y=wine['quality'],color='blue')
plt.title('Citric Acid plot')
plt.show()


In [None]:
plt.figure(figsize=(10,4))
sns.lineplot(x=wine['residual sugar'],y=wine['quality'],color='orange')
plt.title('Residual sugar plot')
plt.show()


In [None]:
plt.figure(figsize=(15,4))
sns.lineplot(x=wine['chlorides'],y=wine['quality'],color='pink')
plt.title('Chlorides plot')
plt.show()


In [None]:
plt.figure(figsize=(10,4))
sns.lineplot(x=wine['free sulfur dioxide'],y=wine['quality'],color='grey')
plt.title(' Free Sulfur Dioxide Plot')
plt.show()


In [None]:
plt.figure(figsize=(10,4))
sns.lineplot(x=wine['total sulfur dioxide'],y=wine['quality'],color='black')
plt.title('Total Sulfur Dioxide Plot')
plt.show()


In [None]:
plt.figure(figsize=(10,4))
sns.lineplot(x=wine['density'],y=wine['quality'],color='cyan')
plt.title('Density Plot')
plt.show()


In [None]:
plt.figure(figsize=(10,4))
sns.lineplot(x=wine['pH'],y=wine['quality'],color='purple')
plt.title('pH Plot')
plt.show()


In [None]:
plt.figure(figsize=(10,4))
sns.lineplot(x=wine['sulphates'],y=wine['quality'],color='lightpink')
plt.title('Sulphate Plot')
plt.show()


In [None]:
plt.figure(figsize=(10,4))
sns.lineplot(x=wine['alcohol'],y=wine['quality'],color='maroon')
plt.title('Alcohol Plot')
plt.show()


MODEL TRAINING

In [None]:
# splitting the dataset  into training and test data
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)
x_train.shape,x_test.shape,y_train.shape,y_test.shape

In [None]:
# feature scaling
scaler=StandardScaler()
x_train_sc=scaler.fit_transform(x_train)
x_test_sc=scaler.transform(x_test)

Classification algorithms

**LOGISTIC REGRESSION**

In [None]:
model1=lr(multi_class='multinomial',max_iter=1000,class_weight='balanced')

In [None]:
param_grid1={
    'C':[0.1],
    'solver':['lbfgs'],
    'penalty':['l2']

}

In [None]:
grid_search=GridSearchCV(estimator=lr(),param_grid=param_grid1,cv=5,scoring='accuracy',error_score='raise')
grid_search.fit(x_train_sc,y_train)

In [None]:
model1=grid_search.best_estimator_
model1.fit(x_train_sc,y_train)

In [None]:

y_pred1=model1.predict(x_test_sc)
# print(y_train[0:9])
# print(y_pred1[0:9])
print('Accuracy achieved',accuracy_score(y_test,y_pred1))
print('Confusion Matrix:\n ',confusion_matrix(y_test,y_pred1))
print("Classification Report:\n",classification_report(y_test, y_pred1,zero_division=0))



In [None]:
cm=confusion_matrix(y_test,y_pred1)

plt.figure(figsize=(6,6))
disp=ConfusionMatrixDisplay(confusion_matrix=m)
disp.plot(cmap=plt.cm.inferno)
plt.title('Confusion Matrix')
plt.show()



**DECISION TREE**

In [None]:
model2=dt(random_state=42)

In [None]:
param_grid2={
    'criterion':['entropy'],
    'max_depth':[None],
    'min_samples_leaf':[1],
    'min_samples_split':[2]

}

In [None]:

grid_search2=GridSearchCV(estimator=dt(),param_grid=param_grid2,cv=5,scoring='accuracy',error_score='raise')
grid_search2.fit(x_train_sc,y_train)

In [None]:
model2=grid_search2.best_estimator_
model2.fit(x_train_sc,y_train)

In [None]:
y_pred2=model2.predict(x_test_sc)
print(y_train[1:4])
print(y_pred2[1:4])
print('Accuracy acieved',(accuracy_score(y_test,y_pred2)*100))
print('Confusion Matrix \n',confusion_matrix(y_test,y_pred2))
print("Classification Report:\n",classification_report(y_test, y_pred2,zero_division=0))


In [None]:
cm2=confusion_matrix(y_test,y_pred2)

plt.figure(figsize=(6,6))
disp=ConfusionMatrixDisplay(confusion_matrix=cm2)
disp.plot(cmap=plt.cm.inferno)
plt.title('Confusion Matrix')
plt.show()


**RANDOM FOREST**

In [None]:
model3=rf(random_state=42)


In [None]:

grid_param3={
    'n_estimators':[100,200,200],
    'max_depth':[10,20,30],
    'min_samples_split':[2],
    'min_samples_leaf':[1],


}

In [None]:
grid_search3=GridSearchCV(estimator=model3,param_grid=grid_param3,cv=5,scoring='accuracy',error_score='raise')
grid_search3.fit(x_train_sc,y_train)

In [None]:
model3 = grid_search3.best_estimator_
model3.fit(x_train_sc,y_train)

In [None]:

y_pred3 = model3.predict(x_test_sc)
print(y_train[0:7])
print(y_pred3[0:7])
print('Accuracy achieved',accuracy_score(y_test,y_pred3)*100)
print('Confusion Matrix:\n ',confusion_matrix(y_test,y_pred3))
print("Classification Report:\n",classification_report(y_test, y_pred3,zero_division=0))


In [None]:
cm3=confusion_matrix(y_test,y_pred3)

plt.figure(figsize=(6,6))
disp=ConfusionMatrixDisplay(confusion_matrix=cm3)
disp.plot(cmap=plt.cm.inferno)
plt.title('Confusion Matrix')
plt.show()


**GRADIENT BOOSTING**

In [None]:
model4=gb(random_state=42)

In [None]:
param_grid4={
    'learning_rate': [0.05],
    'n_estimators':[100,200,300],
    'max_depth':[3],
    'min_samples_split':[2],
    'min_samples_leaf':[1],
}

In [None]:
grid_search4=GridSearchCV(estimator=gb(),param_grid=param_grid4,cv=5,scoring='accuracy',error_score='raise')
grid_search4.fit(x_train_sc,y_train)

In [None]:
model4=grid_search4.best_estimator_
model4.fit(x_train_sc,y_train)

In [None]:

y_pred4=model4.predict(x_test_sc)
print(y_train[10:17])
print(y_pred4[10:17])
print('Accuracy achieved',accuracy_score(y_test,y_pred4)*100)
print('Confusion Matrix ',confusion_matrix(y_test,y_pred4))
print("Classification Report (Micro Average):")
print(classification_report(y_test, y_pred4,zero_division=0))

In [None]:
cm4=confusion_matrix(y_test,y_pred4)

plt.figure(figsize=(6,6))
disp=ConfusionMatrixDisplay(confusion_matrix=cm4)
disp.plot(cmap=plt.cm.inferno)
plt.title('Confusion Matrix')
plt.show()
