In [21]:
import torch
import numpy as np
import csv

In [280]:
wine_path = "./data/tabular/winequality-white.csv"
col_list = ["fixed acidity", "volatile acidity", "citric acid", "residual sugar", "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density", "pH", "sulphates", "alcohol", "quality"]

In [28]:
wineq_numpy = np.loadtxt(wine_path, dtype=np.float32, delimiter=";", skiprows=1)
wineq_numpy

array([[ 7.  ,  0.27,  0.36, ...,  0.45,  8.8 ,  6.  ],
       [ 6.3 ,  0.3 ,  0.34, ...,  0.49,  9.5 ,  6.  ],
       [ 8.1 ,  0.28,  0.4 , ...,  0.44, 10.1 ,  6.  ],
       ...,
       [ 6.5 ,  0.24,  0.19, ...,  0.46,  9.4 ,  6.  ],
       [ 5.5 ,  0.29,  0.3 , ...,  0.38, 12.8 ,  7.  ],
       [ 6.  ,  0.21,  0.38, ...,  0.32, 11.8 ,  6.  ]], dtype=float32)

In [38]:
wineq_t = torch.from_numpy(wineq_numpy)
wineq_t.shape

torch.Size([4898, 12])

In [292]:
data = wineq_t[:, :-1]
target = wineq_t[:, -1].to(torch.uint8)

target_onehot = torch.zeros([target.shape[0], 10], dtype=torch.uint8)
target_onehot.scatter_(1, target.unsqueeze(1).to(torch.int64), 1)

tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 1, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.uint8)

In [254]:
mean = data.mean(dim=0, keepdim=True)
std = data.std(dim=0, keepdim=True)
data = (data - mean) / std

In [295]:
bad_data = data[target <= 3]
mid_data = data[(target > 3) & (target < 7)]
high_data = data[target >= 7]

bad_mean = bad_data.mean(dim=0)
mid_mean = mid_data.mean(dim=0)
high_mean = high_data.mean(dim=0)

for args in zip(col_list, bad_mean, mid_mean, high_mean):
    print("{:20} {:6.2f} {:6.2f} {:6.2f}".format(*args))

fixed acidity          7.60   6.89   6.73
volatile acidity       0.33   0.28   0.27
citric acid            0.34   0.34   0.33
residual sugar         6.39   6.71   5.26
chlorides              0.05   0.05   0.04
free sulfur dioxide   53.33  35.42  34.55
total sulfur dioxide 170.60 141.83 125.25
density                0.99   0.99   0.99
pH                     3.19   3.18   3.22
sulphates              0.47   0.49   0.50
alcohol               10.34  10.26  11.42


In [342]:
predictd_high_indices = data[:,6] < mid_mean[6]
predicted_high_quality =  data[predictd_high_indices]
predicted_high_quality.shape

torch.Size([2727, 11])

In [354]:
actual_high_indices = target > 5
actual_good_wines = data[actual_high_indices]
actual_good_wines.shape

torch.Size([3258, 11])

In [359]:
n_matches = torch.sum(predictd_high_indices & actual_high_indices)
n_predicted = torch.sum(predictd_high_indices).item()
n_actual = torch.sum(actual_high_indices).item()
n_matches, n_predicted, n_actual, n_matches / n_actual

(tensor(2018), 2727, 3258, tensor(0.6194))

In [369]:
t = torch.tensor([[1,2,3,4], [5,6,7,8]])
