# Experiments With GNG Networks
## iris Dataset

## Imports and Dependencies

In [1]:
from sklearn import datasets
import itertools
import numpy as np
import matplotlib.pyplot as plt
from neupy import algorithms

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## Functions

In [2]:
# Model building

## Create the model reference using the choosen parameters
def create_model(
        input_dim, n_start_nodes=2, shuffle_data=True, verbose=True, step=0.2, neighbour_step=0.05, max_edge_age=100,
        max_nodes=1000, _lambda=1000, alpha=0.5, beta=0.995, min_distance_for_update=0
    ):
    return algorithms.GrowingNeuralGas(
        n_inputs=input_dim,
        n_start_nodes=n_start_nodes,

        shuffle_data=shuffle_data,
        verbose=verbose,

        step=step,
        neighbour_step=neighbour_step,

        max_edge_age=max_edge_age,
        max_nodes=max_nodes,

        n_iter_before_neuron_added=_lambda,
        after_split_error_decay_rate=alpha,
        error_decay_rate=beta,
        min_distance_for_update=min_distance_for_update,
    )

## Create multiple models based of a list of parameters (dicts)
## Returns a list of dicts containing the parameters and the model reference
def create_bundle_of_models(model_parameters):
    return [
        {**{"model": create_model(**model_parameter)}, **model_parameter}
        for model_parameter in model_parameters
    ]

## Creates all of the possible combinations of parameters between two lists
def create_permutations(list_of_data_1, list_of_data_2, list_of_data_3):
    return list(itertools.product(list_of_data_1, list_of_data_2, list_of_data_3))

## Creates a list of dicts containing the parameters desired, using the permutation function
def create_test_parameters(list1, list2, list3):
    return [
        {"max_edge_age": parameter[0], "_lambda": parameter[1], "max_nodes": parameter[2], "verbose": False}
        for parameter in create_permutations(list1, list2, list3)
    ]

# Data manipulation

## Rescale the data between 0 and 1: Xr = X - X_min/(X_max - X_min)
def normalize_data(data):
    max_value = np.max(data)
    min_value = np.min(data)
    return (data - min_value) / (max_value - min_value)

# Plots

## Plot the u-matrix with the target classes on each winner neuron
def plot_u_matrix(model):
    plt.figure(figsize=(10, 10))
    plt.pcolor(model['model'].distance_map().T, cmap='coolwarm')
    plt.colorbar()    

    markers = ['o', 'v', 's']
    colors = ['C2', 'C1', 'C3']
    for idx, data in enumerate(input_data):
        winner = model['model'].winner(data)
        plt.plot(winner[0]+.5, winner[1]+.5, markers[target[idx]], markerfacecolor='None',
                 markeredgecolor=colors[target[idx]], markersize=12, markeredgewidth=2)
    plt.axis([0, 30, 0, 30])
    plt.show()
    
def plot_hit_map(model):
    plt.figure(figsize=(10, 10))
    plt.pcolor(model['model'].activation_response(input_data), cmap='Reds')
    plt.colorbar() 

## Iris Dataset

In [3]:
iris = datasets.load_iris()

# Data already in desired format for the MiniSom library (numpy matrix)
# Contains the features on the "data" key and the target classifications on "target" key
print("Number of samples: %s" % (len(iris["data"])))
print(iris)

Number of samples: 150
{'data': array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5

In [4]:
# Splitting the data

# Normalized input
input_data = normalize_data(iris["data"])

# Target classes
target = iris["target"]

## Model parameters and building

In [5]:
max_edge_age_list = [1, 50, 100, 200]
lambda_list = [100, 1000, 2000]
max_nodes_list = [100, 500, 1000, 2000]

# Fixed iteration number for all models
iterations = 5000

# Number of features, in this case, 4
input_dim = input_data.shape[1]

models = create_bundle_of_models(
    [
        {**parameters, **{"input_dim": input_dim}}
        for parameters in create_test_parameters(max_edge_age_list, lambda_list, max_nodes_list)
    ]
)
models

[{'model': GrowingNeuralGas(verbose=False, step=0.2, show_epoch=1, shuffle_data=True, signals=None, n_inputs=4, n_start_nodes=2, neighbour_step=0.05, max_edge_age=1, max_nodes=100, n_iter_before_neuron_added=100, after_split_error_decay_rate=0.5, error_decay_rate=0.995, min_distance_for_update=0),
  'max_edge_age': 1,
  '_lambda': 100,
  'max_nodes': 100,
  'verbose': False,
  'input_dim': 4},
 {'model': GrowingNeuralGas(verbose=False, step=0.2, show_epoch=1, shuffle_data=True, signals=None, n_inputs=4, n_start_nodes=2, neighbour_step=0.05, max_edge_age=1, max_nodes=500, n_iter_before_neuron_added=100, after_split_error_decay_rate=0.5, error_decay_rate=0.995, min_distance_for_update=0),
  'max_edge_age': 1,
  '_lambda': 100,
  'max_nodes': 500,
  'verbose': False,
  'input_dim': 4},
 {'model': GrowingNeuralGas(verbose=False, step=0.2, show_epoch=1, shuffle_data=True, signals=None, n_inputs=4, n_start_nodes=2, neighbour_step=0.05, max_edge_age=1, max_nodes=1000, n_iter_before_neuron_add

In [6]:
for idx, model in enumerate(models):
    print("Model #%s Starting" % (idx))
    model["model"].train(input_data, epochs=500)
    model["quantization_error"] = model["model"].errors.train

Model #0 Starting


                                                                               

Model #1 Starting


                                                                               

Model #2 Starting


                                                                               

Model #3 Starting


                                                                               

Model #4 Starting


                                                                               

Model #5 Starting


                                                                               

Model #6 Starting


                                                                               

Model #7 Starting


                                                                               

Model #8 Starting


                                                                               

Model #9 Starting


                                                                               

Model #10 Starting


                                                                               

Model #11 Starting


                                                                               

Model #12 Starting


                                                                               

Model #13 Starting


                                                                               

Model #14 Starting


                                                                               

Model #15 Starting


                                                                               

Model #16 Starting


                                                                               

Model #17 Starting


                                                                               

Model #18 Starting


                                                                               

Model #19 Starting


                                                                               

Model #20 Starting


                                                                               

Model #21 Starting


                                                                               

Model #22 Starting


                                                                               

Model #23 Starting


                                                                               

Model #24 Starting


                                                                               

Model #25 Starting


                                                                               

Model #26 Starting


                                                                               

Model #27 Starting


                                                                               

Model #28 Starting


                                                                               

Model #29 Starting


                                                                               

Model #30 Starting


                                                                               

Model #31 Starting


                                                                               

Model #32 Starting


                                                                               

Model #33 Starting


                                                                               

Model #34 Starting


                                                                               

Model #35 Starting


                                                                               

Model #36 Starting


                                                                               

Model #37 Starting


                                                                               

Model #38 Starting


                                                                               

Model #39 Starting


                                                                               

Model #40 Starting


                                                                               

Model #41 Starting


                                                                               

Model #42 Starting


                                                                               

Model #43 Starting


                                                                               

Model #44 Starting


                                                                               

Model #45 Starting


                                                                               

Model #46 Starting


                                                                               

Model #47 Starting


                                                                               

In [7]:
ranked_models = sorted(models, key = lambda x: x["quantization_error"][-1])
best_model = ranked_models[0]
ranked_models

[{'model': GrowingNeuralGas(verbose=False, step=0.2, show_epoch=1, shuffle_data=True, signals=None, n_inputs=4, n_start_nodes=2, neighbour_step=0.05, max_edge_age=50, max_nodes=2000, n_iter_before_neuron_added=100, after_split_error_decay_rate=0.5, error_decay_rate=0.995, min_distance_for_update=0),
  'max_edge_age': 50,
  '_lambda': 100,
  'max_nodes': 2000,
  'verbose': False,
  'input_dim': 4,
  'quantization_error': [0.14594583018372456,
   0.09410831566900014,
   0.07581041308119893,
   0.06840388670563698,
   0.06289695614948869,
   0.05826994748786092,
   0.054441955046107375,
   0.052748978876819216,
   0.05116913470129172,
   0.04982434257244071,
   0.04864488407348593,
   0.04688010503227512,
   0.046775517035275695,
   0.04501083520551523,
   0.04444039912894368,
   0.04363429384306073,
   0.04401147901390989,
   0.04285203646868467,
   0.0420082532428205,
   0.04013650060941776,
   0.0387706560951968,
   0.03818319697553913,
   0.03801367040723562,
   0.03704553215454022,
 

In [8]:
print("Best models:")

for idx, model in enumerate(ranked_models[:3]):
    print("\tModel #%s:" % (idx))
    print("\t\tFinal quantization error: %s" % (model["quantization_error"][-1]))
    print("\t\tModel: \n\t\t\tMax edge age: %s \n\t\t\tLambda: %s \n\t\t\tMax nodes: %s\n" % (model["max_edge_age"], model["_lambda"], model["max_nodes"]))

reverse_ranked_models = list(ranked_models)
reverse_ranked_models.reverse()
print("Worst models:")

for idx, model in enumerate(reverse_ranked_models[:3]):
    print("\tModel #%s:" % (len(ranked_models) - (idx+1)))
    print("\t\tFinal quantization error: %s" % (model["quantization_error"][-1]))
    print("\t\tModel: \n\t\t\tMax edge age: %s \n\t\t\tLambda: %s \n\t\t\tMax nodes: %s\n" % (model["max_edge_age"], model["_lambda"], model["max_nodes"]))
    
    
print("All models:")
for idx, model in enumerate(ranked_models):
    print("\tModel #%s:" % (idx))
    print("\t\tFinal quantization error: %s" % (model["quantization_error"][-1]))
    print("\t\tModel: \n\t\t\tMax edge age: %s \n\t\t\tLambda: %s \n\t\t\tMax nodes: %s\n" % (model["max_edge_age"], model["_lambda"], model["max_nodes"]))

Best models:
	Model #0:
		Final quantization error: 1.6322512697305076e-07
		Model: 
			Max edge age: 50 
			Lambda: 100 
			Max nodes: 2000

	Model #1:
		Final quantization error: 1.6358911267388976e-07
		Model: 
			Max edge age: 200 
			Lambda: 100 
			Max nodes: 1000

	Model #2:
		Final quantization error: 1.6374954659189219e-07
		Model: 
			Max edge age: 200 
			Lambda: 100 
			Max nodes: 500

Worst models:
	Model #47:
		Final quantization error: 0.1647202972819408
		Model: 
			Max edge age: 1 
			Lambda: 2000 
			Max nodes: 500

	Model #46:
		Final quantization error: 0.1618606193115314
		Model: 
			Max edge age: 1 
			Lambda: 100 
			Max nodes: 100

	Model #45:
		Final quantization error: 0.16085833559433618
		Model: 
			Max edge age: 1 
			Lambda: 100 
			Max nodes: 1000

All models:
	Model #0:
		Final quantization error: 1.6322512697305076e-07
		Model: 
			Max edge age: 50 
			Lambda: 100 
			Max nodes: 2000

	Model #1:
		Final quantization error: 1.6358911267388976e-07
		Model

## Conclusions

Using the 3 variable parameters (edge age, lambda and max nodes), it's possible to rank the best and worst models using a list ranking based on quantization error. Above is printed the quantization error and the choosen parameters and values, first the 3 best model, than the 3 worst and finally all the models.

All the 3 best models have a lambda of 100, paired with an edge age of 50 to 200 and a minimum of 500 max nodes. All the worst models have lambda as 1, where the other parameters seem not to have influenced the result so much, because they are so variable within each other.

Looking at all results, is best to not use a max edge age of 1, and balance it between 50 and 200. Lambda is good between 100 and 500, which is expected, as a small lambda results in a unstable network and a high lambda demands more epochs and cumputing time. Finally, max nodes values are good between 1000 and 2000, a higher count for this dataset results in a good model with low quantization error, paired with the other best parameters.

In conclusion, choosing a low lambda is a bad choice, creating nodes too fast causes an unstable network with poor results and high error. Choosing an average edge age (50 to 200) paired with a good lambda value leads to a stable network with good results for this dataset.