# V.1 Exploring the green reds

## a) Write a function that will plot a scatterplot matrix of your red wine data.

In [1]:
def plot_scatter_matrix(winedata, good_threshold, bad_threshold, save_plot=False):
    import matplotlib.pyplot as plt
    import matplotlib.patches as patches
    c = winedata.columns
    fig, axes = plt.subplots(len(c) - 1, len(c) - 1)
    fig.subplots_adjust(top=0.99, bottom=0.01, left=0.01, right=0.99, wspace=0, hspace=0)
    for i in range(len(c) - 1):
        for j in range(len(c) - 1):
            ax = axes[i, j]
            p = patches.Rectangle((0, 0), 1, 1, fill=True, transform=ax.transAxes,
                clip_on=False, facecolor='w', zorder=0.1, edgecolor='#000000')
            ax.add_patch(p)
            if (i != j):
                y1 = winedata[winedata['quality'] < bad_threshold][c[i]]
                x1 = winedata[winedata['quality'] < bad_threshold][c[j]]
                y2 = winedata[winedata['quality'] > good_threshold][c[i]]
                x2 = winedata[winedata['quality'] > good_threshold][c[j]]
                min = x1.min()
                if (min > x2.min()):
                    min = x2.min()
                max = x1.max()
                if (max < x2.max()):
                    max = x2.max()
                ax.set_xlim(left=min*0.95, right=max*1.05)
                min = y1.min()
                if (min > y2.min()):
                    min = y2.min()
                max = y1.max()
                if (max < y2.max()):
                    max = y2.max()
                ax.set_ylim(bottom=min*0.95, top=max*1.05)
                ax.scatter(x1, y1, marker='.', s=20, linewidths=0, c='#EC008C')
                ax.scatter(x2, y2, marker='.', s=20, linewidths=0, c='#009E49')
            else:
                ax.text(0.5, 0.5, c[i].replace(' ', '\n'), horizontalalignment='center', verticalalignment='center',
                    transform=ax.transAxes, size='small')
            ax.set_axis_off()
    
    if (save_plot):
        fig.savefig("./matrix.png")
    else:
       plt.show()


## b) Analyze different chemical factors in red wines 

In [None]:
import pandas as pd
df = pd.read_csv('winequality-red.csv', sep=";")
plot_scatter_matrix(df, 7, 4, False)

pH and alcohol is most useful to split wines by quality, because they perfectly visualy divided on two different zones.

# V.2 Learning to perceptron

## a) Implement a perceptron b) Implement a function to train your perceptron

In [None]:
import random
random.seed(24679)

def inf_generator():
    i = 0
    while True:
        yield i
        i += 1

class Perceptron(object):
    """ Perceptron with Rosenblatt perceptron learning rule and Heaviside step activation function """
    
    def __init__(self, inputs_count):
        self.w = [random.random()] * inputs_count
        self.bias = random.random()
        
    def train(self, data, target, epochs_count = 0, eta = 0.01):
        """ train function
            @data - samples
            @target - target values
            @epochs_count - number of training epochs
            @eta - learning rate (between 0.0 and 1.0)
            
            @return a list of (current_epoch, num_errors_at_epoch_end, [array_of_weights], bias)
        """
        result = []
        for i in inf_generator() if epochs_count == 0 else range(epochs_count):
            errors = 0
            for d, t in zip(data, target):
                value = eta * (t - self.predict(d))
                self.w += value * d
                self.bias += value
                errors += int(value != 0.0)
            tuple = (i, errors, self.w, self.bias)
            result.append(tuple);
            if (epochs_count == 0 and errors == 0):
                break
        return result
    
    def predict(self, row):
        result = 0.0
        for i in range(len(self.w)):
            result +=self.w[i] * row[i]
        result += self.bias
        return (1 if result >= 0.0 else 0)
        

In [50]:
import pandas as pd
df = pd.read_csv('winequality-red.csv', sep=";")
good = 7
bad = 4
data = df.loc[(df['quality'] > good) | (df['quality'] < bad),['pH', 'alcohol', 'quality']]
data.loc[data['quality'] < bad, ['quality']] = 0
data.loc[data['quality'] > good, ['quality']] = 1
print(data)
# perceptron = Perceptron(data.shape[1])

        pH  alcohol  quality
267   3.35    12.80        1
278   3.23    12.60        1
390   3.56    12.90        1
440   2.88     9.80        1
455   3.22    13.40        1
459   3.25     9.00        0
481   3.15    11.70        1
495   3.15    11.00        1
498   3.15    11.00        1
517   3.16     8.40        0
588   3.72    14.00        1
690   3.63    10.70        0
828   3.46    12.70        1
832   3.38     9.90        0
899   3.48    11.00        0
1061  3.21    12.50        1
1090  2.98    11.80        1
1120  3.20    13.10        1
1202  3.35    11.70        1
1269  3.50    14.00        1
1299  3.50    10.90        0
1374  3.32     9.80        0
1403  3.23    10.00        1
1449  3.23    11.30        1
1469  3.31     9.70        0
1478  3.40    10.20        0
1505  3.55     9.95        0
1549  3.24    11.40        1
