In [1]:
import numpy as np
import os
import pandas

In [2]:
filename = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname('treinamento.txt'))) + '/treinamento-1.txt'
series = pandas.read_csv(filename,  header=None)

In [3]:
def series_to_supervised(df, n_lags, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
        data: Sequence of observations as a pandas dataframe.
        n_lags: Number of lag observations as input (X).
        n_out: Number of observations as output (y).
        dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
        Pandas DataFrame of series framed for supervised learning.
    """
    n_vars = df.shape[1]
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_lags, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pandas.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [4]:
D = series_to_supervised(series, 3).values

X = D[:,0:-1]
Y = D[:,-1].reshape(X.shape[0],1)

train_size = round(X.shape[0] * 0.7)
test_size = X.shape[0] - train_size
Xtr = X[0:train_size,:]
Xv = X[train_size:train_size+test_size,:]
Ytr = Y[0:train_size,:]
Yv = Y[train_size:train_size+test_size,:]

In [5]:
# Xtr = np.array([[0.5348242,0.4279261,0.6337585],
#                  [0.4279261,0.6337585,0.1967004],
#                  [0.6337585,0.1967004,0.9226179],
#                  [0.1967004,0.9226179,-0.7024477],
#                  [0.9226179,-0.7024477,0.0131345],
#                  [-0.7024477,0.0131345,0.999655],
#                  [0.0131345,0.999655,-0.9986200999999999],
#                  [0.999655,-0.9986200999999999,-0.9944841999999998],
#                  [-0.9986200999999999,-0.9944841999999998,-0.9779978000000001],
#                  [-0.9944841999999998,-0.9779978000000001,-0.9129594000000001],
#                  [-0.9779978000000001,-0.9129594000000001,-0.6669898],
#                  [-0.9129594000000001,-0.6669898,0.1102493],
#                  [-0.6669898,0.1102493,0.9756902],
#                  [0.1102493,0.9756902,-0.7811937],
#                  [0.9756902,-0.7811937,0.7368374],
#                  [-0.7811937,0.7368374,-0.7920459],
#                  [0.7368374,-0.7920459,0.6854684],
#                  [-0.7920459,0.6854684,-0.6613866],
#                  [0.6854684,-0.6613866,0.7413643],
#                  [-0.6613866,0.7413643,-0.704547]])

# Ytr = np.array([[0.1967004],
#                 [0.9226179],
#                 [-0.7024477],
#                 [0.0131345],
#                 [0.999655],
#                 [-0.9986200999999999],
#                 [-0.9944841999999998],
#                 [-0.9779978000000001],
#                 [-0.9129594000000001],
#                 [-0.6669898],
#                 [0.1102493],
#                 [0.9756902],
#                 [0.7811937],
#                 [0.7368374],
#                 [-0.7920459],
#                 [0.6854684],
#                 [-0.6613866],
#                 [0.7413643],
#                 [-0.704547],
#                 [0.3774754]])

m = 3

In [6]:
##Pesos para comparacao com Matlab
# W = {}
# W[0] = np.array([[0.93116, 0.72449, 0.59282, 0.70201]])
# W[1] = np.array([[0.27985 , 0.69036, 0.40103, 0.94530]])

# Wg = np.array([[0.34744, 0.45523, 0.99094 , 0.89728],
#        [0.86866, 0.95300, 0.41893, 0.41763 ]])

In [7]:
def softmax(Z):
    Z_exp = np.exp(Z)
    Z_sum = np.sum(Z_exp, axis = 1, keepdims = True)
    return Z_exp/Z_sum   

In [8]:
def maximiza_gating(Wg,Xtr,m,h):
    N = Xtr.shape[0]
    ne = Xtr.shape[1]
    Yg = softmax(np.dot(Xtr,Wg.T))
    
    grad = np.dot((h-Yg).T , (Xtr/N))
    dir = grad
    nit = 0
    nitmax = 50000
    alfa = 0.15
    
    while np.linalg.norm(grad) > 1e-5 and nit < nitmax:
        nit = nit + 1
        Wg = Wg + (alfa * dir) 
        Yg = softmax(np.dot(Xtr,Wg.T))
        grad = np.dot((h-Yg).T , (Xtr/N))
        dir = grad   
        
    return Wg

In [9]:
def maximiza_expert(W,var,Xtr,Ytr,h):
    Ye = np.dot(Xtr,W.T)
    N = Ye.shape[0]
    ns = Ye.shape[1]
    
    grad = np.dot(((h/var) * (Ytr-Ye)).T, Xtr/N)
    
    dir = grad
    nit = 0
    nitmax = 50000
    alfa = 0.15
    
    while np.linalg.norm(grad) > 1e-5 and nit < nitmax:
        nit = nit + 1
        W = W + (alfa * dir) 
        Ye = np.dot(Xtr,W.T)
        grad = np.dot(((h/var) * (Ytr-Ye)).T, Xtr/N)
        dir = grad 
        
    diff = Ytr - Ye
    soma = 0

    for i in range(m):
        soma = soma + (h[i] * np.dot(diff[i,:],diff[i,:].T))
    var = max(0.05, (1/ns) * soma/np.sum(h))
    
    return W,var

In [10]:
def mistura(Xtr, Ytr, m):
    Ntr = Xtr.shape[0]
    ne = Xtr.shape[1]
    ns = Ytr.shape[1]

    ##add bias
    Xtr = np.concatenate((Xtr, np.ones((Ntr,1))), axis=1)
    ne = ne + 1
    
    ## Inicializa rede gating
    Wg = np.random.rand(m,ne)
    
    # Inicializa especialistas
    W = {}
    var = list(range(m))
    for i in range(m):
        W[i] = np.random.rand(ns,ne)
        var[i] = 1
        
    ##calcula saida
    Yg = softmax(np.dot(Xtr,Wg.T))
    
    Ye = {}
    for i in range(m):
        Ye[i] = np.dot(Xtr,W[i].T)
    Ym = np.zeros((Ntr,ns))
    for i in range(m):
        Yge = Yg[:,i].reshape(Ntr,1)
        Ym = Ym + Ye[i]*Yge
        
    ##calculo da verossimilhanca    
    Py = np.zeros((Ntr,m))
    for i in range(m):
        Yaux = Ye[i]
        for j in range(Ntr):
            diff = Ytr[j,:]- Yaux[j,:]
            Py[j,i] = np.exp(np.dot(-diff,diff.T)/(2*var[i]))
            
    likelihood = np.sum(np.log(np.sum(Yg*Py, axis = 1, keepdims = True)))
    likelihood_ant = 0
    nit = 0
    nitmax = 100
    
    while np.abs(likelihood-likelihood_ant)>1e-3 and nit < nitmax:
        nit = nit + 1
        #Passo E
        haux = Yg*Py
        h = haux / np.dot(np.sum(haux, axis=1, keepdims=True), np.ones((1,m)))
        ##Passo M
        Wg = maximiza_gating(Wg,Xtr,m,h)
        for i in range(m):
            W[i],var[i] = maximiza_expert(W[i],var[i],Xtr,Ytr,h[:,i].reshape(Ntr,1))
        likehood_ant = likelihood


        ##calcula a saida
        Yg = softmax(np.dot(Xtr,Wg.T))
        Ye = {}
        for i in range(m):
            Ye[i] = np.dot(Xtr,W[i].T)
        Ym = np.zeros((Ntr,ns))
        for i in range(m):
            Yge = Yg[:,i].reshape(Ntr,1)
            Ym = Ym + Ye[i]*Yge


        ##calculo da funcao de verossimilhanca    
        Py = np.zeros((Ntr,m))
        for i in range(m):
            Yaux = Ye[i]
            for j in range(Ntr):
                diff = Ytr[j,:]- Yaux[j,:]
                Py[j,i] = np.exp(np.dot(-diff,diff.T)/(2*var[i]))

        likelihood = np.sum(np.log(np.sum(Yg*Py, axis = 1, keepdims = True)))
        
    me = {}
    me['gating'] = Wg
    me['expert_W'] = W
    me['expert_var'] = var
    
    return me

In [11]:
me = mistura(Xtr, Ytr, m)

In [13]:
me

{'expert_W': {0: array([[ 0.06845296,  0.58025505, -1.59429785,  0.81113139]]),
  1: array([[-0.17932284,  0.35784666,  1.11147883,  0.87992884]]),
  2: array([[-0.05487955, -0.79054338,  0.03802857,  0.09699909]])},
 'expert_var': [0.05, 0.05, 0.05],
 'gating': array([[ 2.54364104,  3.8498252 ,  9.57623119, -1.87235004],
        [-0.22880707, -1.09578072, -5.03725358,  0.84993285],
        [ 0.56631992, -0.87882916, -3.1045485 ,  1.32102428]])}

In [15]:
##Obtendo saida

Ntr = Xtr.shape[0]
ne = Xtr.shape[1]
ns = Ytr.shape[1]

##add bias
Xtr = np.concatenate((Xtr, np.ones((Ntr,1))), axis=1)
ne = ne + 1


Wg = me['gating']
W = me['expert_W']


##calcula a saida
Yg = softmax(np.dot(Xtr,Wg.T))
Ye = {}
for i in range(m):
    Ye[i] = np.dot(Xtr,W[i].T)
Ym = np.zeros((Ntr,ns))
for i in range(m):
    Yge = Yg[:,i].reshape(Ntr,1)
    Ym = Ym + Ye[i]*Yge

In [16]:
Ym - Ytr 

array([[-1.11110911e-01],
       [-7.79651958e-02],
       [ 2.00183798e-01],
       [-1.27014169e-02],
       [-4.30083921e-01],
       [ 1.75956505e-01],
       [ 9.26180271e-01],
       [ 6.74432238e-01],
       [ 6.33122941e-01],
       [ 4.69934677e-01],
       [-3.66605718e-04],
       [-1.61411847e-01],
       [ 5.53875445e-02],
       [-7.35247697e-01],
       [ 6.16330942e-02],
       [-4.39214610e-01],
       [ 3.26817428e-02],
       [-4.16435100e-01],
       [ 1.46808613e-02],
       [-1.09643603e-01],
       [ 8.56662819e-01],
       [-4.27309296e-01],
       [ 2.25996885e-01],
       [-7.28782412e-01],
       [-2.89604801e-02],
       [-6.42427337e-01],
       [-3.55946308e-01],
       [-4.14823047e-01],
       [-3.23151426e-01],
       [ 4.99379363e-02],
       [ 4.29747247e-01],
       [ 4.69194816e-01],
       [ 7.25745462e-01],
       [-2.37245034e-01],
       [ 2.50031083e-01],
       [-3.86123202e-01],
       [ 2.04106527e-02],
       [-2.08800746e-01],
       [ 3.7

In [17]:
##Obtendo saida

Nv = Xv.shape[0]
ne = Xv.shape[1]
ns = Yv.shape[1]

##add bias
Xv = np.concatenate((Xv, np.ones((Nv,1))), axis=1)
ne = ne + 1


Wg = me['gating']
W = me['expert_W']


##calcula a saida
Yg = softmax(np.dot(Xv,Wg.T))
Ye = {}
for i in range(m):
    Ye[i] = np.dot(Xv,W[i].T)
Ym = np.zeros((Nv,ns))
for i in range(m):
    Yge = Yg[:,i].reshape(Nv,1)
    Ym = Ym + Ye[i]*Yge

In [18]:
Ym - Yv

array([[-0.40511225],
       [ 0.33307277],
       [-0.42058386],
       [ 0.0054691 ],
       [-0.49710018],
       [ 0.07694513],
       [-0.77698214],
       [-0.30046797],
       [ 0.65863344],
       [-0.31237043],
       [-0.42922417],
       [-0.48419633],
       [ 0.27615924],
       [ 0.9105054 ],
       [ 0.63795647],
       [ 0.48871417],
       [ 0.04287086],
       [-0.22134836],
       [ 0.18321939],
       [ 0.89788587],
       [ 0.60707396],
       [ 0.37594606],
       [-0.17410517],
       [ 0.14814518],
       [-0.20857255],
       [-0.13431702],
       [ 0.0336834 ],
       [-0.75313196],
       [-0.18004292],
       [-0.33033253],
       [-0.64637495],
       [-0.23171481],
       [-0.09311544],
       [-0.06679032],
       [ 0.2601936 ],
       [ 0.48601249],
       [-0.1794928 ],
       [ 0.14942867],
       [-0.20436926],
       [-0.12416508],
       [ 0.06803684],
       [-0.69845729],
       [-0.41684691],
       [-0.62694135],
       [ 0.1512278 ],
       [ 0