In [1]:
from __future__ import division

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib import patches as patches
import random

%matplotlib inline

In [2]:
data = np.loadtxt("vinhos.txt", delimiter=',')
target = data[:,0]
data = data[:,1:]

print(data.shape[1])

data = data / np.linalg.norm(data)

13


In [3]:
data[0]

array([1.30573482e-03, 1.56908401e-04, 2.22975096e-04, 1.43144506e-03,
       1.16534310e-02, 2.56926037e-04, 2.80783455e-04, 2.56926037e-05,
       2.10128795e-04, 5.17522446e-04, 9.54296709e-05, 3.59696452e-04,
       9.77236534e-02])

In [4]:
def compete(input, net, n_features):
    bmu_idx = np.array([0, 0])
    idx_x = -1
    idx_y = -1
    min_dist = np.iinfo(np.int).max
    
    # calculate the distance between each neuron and the input
    for x in range(net.shape[0]):
        for y in range(net.shape[1]):
            w = net[x, y, :].reshape(n_features, 1)
            sq_dist = np.sum((w - input) ** 2)
            sq_dist = np.sqrt(sq_dist)
            if sq_dist < min_dist:
                min_dist = sq_dist # dist
#                 bmu_idx = np.array([x, y]) # id
                idx_x = x
                idx_y = y
    
    bmu = net[bmu_idx[0], bmu_idx[1], :].reshape(n_features, 1)
#     return (bmu, bmu_idx)
    return bmu, idx_x, idx_y

In [5]:
def decay_param(param_value, t, constant):
    return param_value * np.exp(-t / constant)

def decay_radius(initial_radius, i, time_constant):
    return initial_radius * np.exp(-i / time_constant)

def decay_learning_rate(initial_learning_rate, i, n_iterations):
    return initial_learning_rate * np.exp(-i / n_iterations)

def calculate_influence(distance, radius):
    return np.exp(-distance / (2* (radius**2)))

In [6]:
n_iterations = 300
init_lr = 0.75

rows = 20
cols = 10
n_features = data.shape[1]
n_elements = data.shape[0]

init_radius = max(rows, cols) / 2
time_constant = n_iterations / np.log(init_radius)

In [7]:
net = np.random.random((rows, cols, n_features))

In [8]:
idxs = np.arange(178)
np.random.shuffle(idxs)

In [None]:
for i in range(0, n_iterations):#, len(idxs)):
#     r = decay_param(init_radius, i, time_constant)
#     l = decay_param(init_lr, i, time_constant)
    
    r = decay_radius(init_radius, i, time_constant)
    l = decay_learning_rate(init_lr, i, n_iterations)    
#     example = random.randint(0,177)
#     t = (data[example])
#     bmu, idx_x, idx_y = compete(t, net, n_features)    
#     t = np.matrix(t).T        
#     for x in range(net.shape[0]):
#         for y in range(net.shape[1]):
#             w = net[x, y, :].reshape(n_features, 1)
#             aux = ((np.array([x, y]) - np.array([idx_x, idx_y])) ** 2)
#             w_dist = np.sqrt(np.sum(aux))

#             if w_dist <= r: #Update weghts from neit...
#                 # calculate the degree of influence (based on the 2-D distance)
#                 influence = calculate_influence(w_dist, r)                
#                 new_w = w + (l * influence * (t - w))
#                 net[x, y, :] = new_w.reshape(n_features)
             
    # select a training example at random
    for j, example in enumerate(idxs): 
#         r = decay_radius(init_radius, i, time_constant)
#         l = decay_learning_rate(init_lr, i, n_iterations)
        t = (data[example])
        # find its Best Matching Unit
        bmu, idx_x, idx_y = compete(t, net, n_features)        
        # update weight vector to move closer to input
        # and move its neighbours in 2-D vector space closer        
        t = np.matrix(t).T        
        for x in range(net.shape[0]):
            for y in range(net.shape[1]):
                w = net[x, y, :].reshape(n_features, 1)
                aux = ((np.array([x, y]) - np.array([idx_x, idx_y])) ** 2)
                w_dist = np.sqrt(np.sum(aux))
    
                if w_dist <= r**2: #Update weghts from neit...                    
                    influence = calculate_influence(w_dist, r)                    
                    new_w = w + (l * influence * (t - w))
                    net[x, y, :] = new_w.reshape(n_features)

    print(r, i)
    

10.0 0
9.923540961321004 1
9.847666521101582 2
9.772372209558107 3
9.697653591082494 4
9.623506263980886 5
9.54992586021436 6
9.476908045141593 7
9.404448517263518 8
9.33254300796991 9
9.261187281287935 10
9.190377133632593 11
9.120108393559098 12
9.050376921517122 13
8.981178609606946 14
8.912509381337456 15
8.844365191385997 16
8.776742025360063 17
8.709635899560805 18
8.643042860748357 19
8.576958985908941 20
8.511380382023765 21
8.446303185839685 22
8.381723563641621 23
8.317637711026709 24
8.254041852680183 25
8.190932242152973 26
8.128305161640993 27
8.066156921766135 28
8.004483861358919 29
7.943282347242815 30
7.882548774020213 31
7.822279563860026 32
7.762471166286917 33
7.703120057972146 34
7.6442227425260025 35
7.5857757502918375 36
7.527775638141662 37
7.470218989273317 38
7.413102413009175 39
7.356422544596413 40
7.30017604500878 41
7.244359600749901 42
7.188969923658072 43
7.134003750712562 44


In [None]:
print(net.shape)
fig = plt.figure()

ax = fig.add_subplot(111, aspect='equal')
ax.set_xlim((0, net.shape[0]+1))
ax.set_ylim((0, net.shape[1]+1))
ax.set_title('Self-Organising Map after %d iterations' % n_iterations)

# plot
for x in range(1, net.shape[0] + 1):
    for y in range(1, net.shape[1] + 1):
#         print('net: ', net[x-1,y-1,:])
        element = net[x-1,y-1,:]
#         print(element[0:4])
        aux_r = np.average(element[0:4])
        aux_g = np.average(element[4:8])
        aux_b = np.average(element[8:])
#         print(aux_r, aux_g, aux_b)
#         aux = np.sum(element)
#         print(aux_r)
#         print(aux_g)
#         print(aux_b)
        
        ax.add_patch(patches.Rectangle((x-0.5, y-0.5), 1, 1, facecolor = [aux_r,aux_g,aux_b], edgecolor='none'))
#         ax.add_patch(patches.Rectangle((x-0.5, y-0.5), 1, 1, color = [(aux_r,aux_g,aux_b)]
#                      facecolor=[aux_r, aux_g, aux_b]))
plt.show()

In [None]:
center_1 = np.mean(data[0:59], axis = 0)
center_2 = np.mean(data[59:130], axis = 0)
center_3 = np.mean(data[130:178], axis = 0)
print('center_1')
print(data[0:59])

print('center_2')
# print(center_2)

print('center_2')
# print(center_2)

In [None]:
from sklearn.cluster import KMeans

In [None]:
# X     = np.array([[1, 2], [1, 4], [1, 0],[10, 2], [10, 4], [10, 0]])
kmeans = KMeans(n_clusters=3, random_state=0).fit(data)
kmeans.cluster_centers_

In [None]:
list_1 = []
list_2 = []
list_3 = []

# clusters = kmeans.cluster_centers_
clusters = np.array([center_1,center_2,center_3])
# print(clusters)
for x in range(net.shape[0]):
    for y in range(net.shape[1]):
        minimo  = np.inf
        idx_min = 9
        for idx, center in enumerate(clusters):            
            aux = ((net[x, y, :] - center)** 2)
            w_dist = np.sqrt(np.sum(aux))
            if w_dist < minimo:
                minimo  = w_dist
                idx_min = idx
        
        if idx_min == 0:
            list_1.append((x, y))
        elif idx_min == 1:
            list_2.append((x, y))
        elif idx_min == 2:
            list_3.append((x, y))

global_list = []
global_list.append(list_1)
global_list.append(list_2)
global_list.append(list_3)

print(len(list_1))
print(len(list_2))
print(len(list_3))
            

In [None]:
fig = plt.figure()

ax = fig.add_subplot(111, aspect='equal')
ax.set_xlim((0, net.shape[0]))
ax.set_ylim((0, net.shape[1]))
ax.set_title('Self-Organising Map after %d iterations' % n_iterations)

for idx, lista in enumerate(global_list):
    for item in lista:
        x = item[0]
        y = item[1]
        
        if idx == 0:
            ax.add_patch(patches.Rectangle((x, y), 1, 1, facecolor = [1,0,0], edgecolor='none'))
        elif idx == 1:
            ax.add_patch(patches.Rectangle((x, y), 1, 1, facecolor = [0,0.8,0], edgecolor='none'))
        elif idx == 2:
            ax.add_patch(patches.Rectangle((x, y), 1, 1, facecolor = [0,0,1], edgecolor='none'))                                
plt.show()