## Citations

Rollings, A. (2020). mlrose: Machine Learning, Randomized Optimization and SEarch package for Python, hiive extended remix. https://github.com/hiive/mlrose. Accessed: Sept 28, 2020

Hayes, G. (2019). mlrose: Machine Learning, Randomized Optimization and SEarch package for Python. https://github.com/gkhayes/mlrose. Accessed: Sept 28, 2020

(Timer) https://stackoverflow.com/questions/7370801/how-to-measure-elapsed-time-in-python

In [1]:
import mlrose_hiive as mlrose
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import timeit

from sklearn import tree, svm, datasets, metrics, preprocessing
from sklearn.model_selection import train_test_split,StratifiedKFold, ShuffleSplit
from sklearn.model_selection import StratifiedShuffleSplit, cross_validate, GridSearchCV, learning_curve, cross_val_score, validation_curve
from sklearn.metrics import roc_curve, auc, accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder

## Knapsack

In [15]:
# define fitness

weights = [6, 6, 6, 2, 2, 8, 10, 9, 1, 7, 2, 3, 3, 1, 5, 5, 5, 3, 2, 1]
values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 4, 3, 2, 1, 1]
max_weight_pct = 0.6
fitness = mlrose.Knapsack(weights, values, max_weight_pct)

In [17]:
problem = mlrose.DiscreteOpt(length = len(weights), fitness_fn = fitness, maximize = True)
max_iters = 60000
max_attempts = 60000

### Randomized Hill Climbing

In [20]:
start = timeit.default_timer()
best_state, best_fitness, rhc_curve = mlrose.random_hill_climb(problem, max_attempts=max_attempts, max_iters=max_iters, restarts=20, curve=True, init_state = None, random_state=1)
end = timeit.default_timer()

print(f'Wall time: {end-start}s')

Wall time: 47.26125979400001s


In [21]:
len(rhc_curve)

60000

### Simulated Annealing

In [22]:
start = timeit.default_timer()
schedule = mlrose.GeomDecay(init_temp=100, decay=0.95, min_temp=1)
best_state, best_fitness, sa_curve = mlrose.simulated_annealing(problem=problem, max_attempts=max_attempts, max_iters=max_iters, schedule=schedule, curve=True, random_state=None)
end = timeit.default_timer()

print(f'Wall time: {end-start}s')

Wall time: 2.9406944450000196s


### Genetic Algorithm

In [23]:
start = timeit.default_timer()
best_state, best_fitness, ga_curve = mlrose.genetic_alg(problem=problem, pop_size=len(weights), mutation_prob=0.3, max_attempts=100, max_iters=max_iters, curve=True, random_state=None)
end = timeit.default_timer()

print(f'Wall time: {end-start}s')

Wall time: 0.2083114039999998s


### MIMIC

In [None]:
start = timeit.default_timer()
best_state, best_fitness, mimic_curve = mlrose.mimic(problem, pop_size = 600, keep_pct = 0.2, max_attempts = max_attempts, 
                                                    max_iters = max_iters,  curve=True, random_state = 1)
end = timeit.default_timer()

print(f'Wall time: {end-start}s')

In [None]:
plot_dict = dict(x=np.arange(1,max_iters+1),
                 Random_Hill = rhc_curve,
                 Sim_Anneal = sa_curve,
                 Genetic_Alg = ga_curve,
                 MIMIC = mimic_curve)

In [None]:
df = pd.DataFrame.from_dict(plot_dict, orient='index')
df = df.transpose()
df.head()

In [None]:
fig, ax = plt.subplots(figsize=(6, 4))
ax.set_title("Knapsack problem fitness vs iterations")
ax.plot(rhc_curve, label='RHC', color='r')
ax.plot(sa_curve, label='SA', color='b')
ax.plot(ga_curve, label='GA', color='orange')
ax.plot(mimic_curve, label='MIMIC', color='g')
ax.set_xlabel('Iterations')
ax.set_ylabel('Fitness')
ax.grid(True)
ax.legend()