<a href="https://colab.research.google.com/github/vishal-burman/PyTorch-Architectures/blob/master/research/AAAMLP_Notes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! wget https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv

In [10]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier

In [2]:
dataset = pd.read_csv("winequality-red.csv", sep=";")  

In [3]:
dataset.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [4]:
quality_mapping = {
    3: 0,
    4: 1,
    5: 2,
    6: 3,
    7: 4,
    8: 5,
}
dataset["quality"] = dataset.quality.map(quality_mapping)

In [5]:
dataset = dataset.sample(frac=1).reset_index(drop=True) # Randomize the df

In [6]:
dataset_train = dataset.head(1000)
dataset_valid = dataset.tail(599)

In [7]:
columns = list(dataset.columns)
columns.remove("quality")
columns

['fixed acidity',
 'volatile acidity',
 'citric acid',
 'residual sugar',
 'chlorides',
 'free sulfur dioxide',
 'total sulfur dioxide',
 'density',
 'pH',
 'sulphates',
 'alcohol']

In [8]:
classifier = DecisionTreeClassifier(max_depth=3)

In [9]:
classifier.fit(dataset_train[columns], dataset_train.quality)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=3, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

In [11]:
train_predictions = classifier.predict(dataset_train[columns])
train_accuracy = accuracy_score(dataset_train.quality, train_predictions)
train_accuracy

0.596

In [13]:
valid_predictions = classifier.predict(dataset_valid[columns])
valid_accuracy = accuracy_score(dataset_valid.quality, valid_predictions)
valid_accuracy

0.5575959933222037

In [14]:
# Changing the depth of tree classifier to max_depth=7
classifier = DecisionTreeClassifier(max_depth=7)
classifier.fit(dataset_train[columns], dataset_train.quality)

train_predictions = classifier.predict(dataset_train[columns])
train_accuracy = accuracy_score(dataset_train.quality, train_predictions)

valid_predictions = classifier.predict(dataset_valid[columns])
valid_accuracy = accuracy_score(dataset_valid.quality, valid_predictions)

print(f"Train Accuracy: {train_accuracy: .3f} || Valid Accuracy: {valid_accuracy: .3f}")

Train Accuracy:  0.775 || Valid Accuracy:  0.583


In [18]:
# Looking at the train and Test Accuracy for depth=1 to depth=25

for i in range(1, 26):
  classifier = DecisionTreeClassifier(max_depth=i)
  classifier.fit(dataset_train[columns], dataset_train.quality)

  train_predictions = classifier.predict(dataset_train[columns])
  train_accuracy = accuracy_score(dataset_train.quality, train_predictions)

  valid_predictions = classifier.predict(dataset_valid[columns])
  valid_accuracy = accuracy_score(dataset_valid.quality, valid_predictions)

  print(f"Train Accuracy: {train_accuracy: .3f} || Valid Accuracy: {valid_accuracy: .3f}")

Train Accuracy:  0.563 || Valid Accuracy:  0.538
Train Accuracy:  0.563 || Valid Accuracy:  0.538
Train Accuracy:  0.596 || Valid Accuracy:  0.558
Train Accuracy:  0.630 || Valid Accuracy:  0.564
Train Accuracy:  0.651 || Valid Accuracy:  0.566
Train Accuracy:  0.720 || Valid Accuracy:  0.571
Train Accuracy:  0.775 || Valid Accuracy:  0.584
Train Accuracy:  0.834 || Valid Accuracy:  0.579
Train Accuracy:  0.888 || Valid Accuracy:  0.564
Train Accuracy:  0.911 || Valid Accuracy:  0.574
Train Accuracy:  0.934 || Valid Accuracy:  0.596
Train Accuracy:  0.959 || Valid Accuracy:  0.574
Train Accuracy:  0.977 || Valid Accuracy:  0.586
Train Accuracy:  0.984 || Valid Accuracy:  0.593
Train Accuracy:  0.990 || Valid Accuracy:  0.583
Train Accuracy:  0.997 || Valid Accuracy:  0.591
Train Accuracy:  1.000 || Valid Accuracy:  0.579
Train Accuracy:  1.000 || Valid Accuracy:  0.581
Train Accuracy:  1.000 || Valid Accuracy:  0.594
Train Accuracy:  1.000 || Valid Accuracy:  0.596
Train Accuracy:  1.0