## Imports 

In [63]:
import math
import numpy as np
from sklearn import model_selection
from sklearn import datasets
from sklearn.model_selection import train_test_split 
from sklearn import metrics 
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from random import *

## Simulated Annealing

In [None]:
# Simulated Annealing function steps
# @parameters 
# sol - random solution (ML model)
# X_train, y_train - training data
# X_test, y_test - testing data
# @return solution and cost

# 1. Generate a random solution
# 2. Calculate its cost using a cost function (accuracy of the ML Model)
# 3. Generate a random neigboring solution
# 4. Calculate new solutions cost (accuracy of the ML model)
# 5. Compare solutions
#     - If c_new > c_old move to the new solution
#     - If c_new < c_old maybe move to the new solution
# 6. Repeat steps until an acceptable solution is found or max number of iterations is reached
def sa(sol, X_train, y_train, X_test, y_test):
    t = 1.0
    t_min = 0.00001
    alpha = 0.9
    old_cost = cost(sol, X_train, y_train, X_test, y_test)
    while t > t_min:
        i = 1
        while i <= 100:
            new_sol = neighbor(sol)
            new_cost = cost(new_sol, X_train, y_train, X_test, y_test)
            ap = acceptance_probability(old_cost, new_cost, t)
            if ap > random():
                sol = new_sol
                old_cost = new_cost
            i += 1
        t = t * alpha
    return sol, cost          

In [154]:
# Function to generate neighboring solution
# Solution is defined as a machine learning model along with a set of parameters
# i.e.,
# solution = {
#     model: 'LinearRegression',
#     parameters: {
#         fit_intercept: true,
#         normalize: true,
#         copy_X: false,
#         n_jobs: 4,
#     }
# }
def neighbor(sol):
    # Use the search_space to find a new neighbor of the current solution and return that
    
    
    # Chose a random parameter and modify it 
    parameter = choice(list(search_space[sol['name']].keys())) # random.choice()
    print('Randomly got ', parameter)
    
    parameter_space = search_space[sol['name']][parameter]
    print('Parameter space', parameter_space)
    
    # Grab the random parameter from our current solution and change it
    current_parameter_val = sol['parameters'][parameter]
    print('Current parameter value', current_parameter_val)
    
    # Grab the current index of the selected parameter of our model
    current_index = parameter_space.index(current_parameter_val)
    
    # In one step modify the value of the selected parameter
    if current_index == 0:
        # index = 0 -> index++
        modified_parameter = parameter_space[1];
    elif current_index == len(parameter_space):
        # index = length -> index--
        modified_parameter = parameter_space[current_index - 1]
    else: 
        # index = index + random(-1,1)
        modified_parameter = parameter_space[current_index + choice([-1, 1])]
    
    # Create a new solution copy the current one and replace the randomly chosen parameter
    new_sol = sol
    new_sol['parameters'][parameter] = modified_parameter
    print('New parameter value', modified_par)
    # Return new neighboring solution
    return new_sol

In [60]:
# Function to return the cost of a current solution
# In our case the cost is the accuracy (or we can use other metrics) of the current ML model / parameter configuration
# TODO: RMSE or some other metric?
def cost(sol, X_train, y_train, X_test, y_test): 
    # Get model
    model = get_model(sol['name'], sol['parameters'])
    
    # Train model on data
    model.fit(X_train, y_train)
    
    # Predictions
    predictions = model.predict(X_test)
    
    # Get accuracy or some other metric and return
    score = model.score(X_test, y_test)
    
    return score

In [58]:
# Function to get a model with a parameter configuration
def get_model(name, parameters):
    # LogisticRegression
    if name == 'LogisticRegression':
        lr = LogisticRegression(parameters)
        return lr
    # Model2
    elif name == '':
        return
    # Model3
    elif name == '':
        return
    # Model4
    elif name == '':
        return
    # Model5
    elif name == '':
        return
    # Default
    else:
        return

In [61]:
# Function which recommends if we should jump to a new solutions or not
# 1.0 - definitely switch
# 0.0 - definitely stay put
# 0.5 - 50/50 odds of switching
# Usually calculated by e^((c_new - c_old)/t)

def acceptance_probability(old_cost, new_cost, t): 
    if new_cost < old_cost:
        return 1.0
    else:
        return math.exp((new_cost - old_cost)/t)

## ML Models

In [104]:
# TODO: Define 5 ML models

# Search Space that will be used to define our neighborhood of ML models and hyperparameters
# Basically our dictionary defining the model, its most important parameters, and their value ranges
search_space = {
    'LogisticRegression': {
        'penalty': ['l1', 'l2'],
        'C': np.logspace(-4, 4, 20).tolist(),
        'solver': ['liblinear', 'saga'],
    },
    'Model2': {
        'property1': 'value_range',
        'property2': 'value_range',
        'property3': 'value_range',
        'property4': 'value_range'
    },
    'Model3': {
        'property1': 'value_range',
        'property2': 'value_range',
        'property3': 'value_range',
        'property4': 'value_range'
    },
    'Model4': {
        'property1': 'value_range',
        'property2': 'value_range',
        'property3': 'value_range',
        'property4': 'value_range'
    },
    'Model5': {
        'property1': 'value_range',
        'property2': 'value_range',
        'property3': 'value_range',
        'property4': 'value_range'
    }
}

In [105]:
search_space['LogisticRegression']['C']

[0.0001,
 0.00026366508987303583,
 0.0006951927961775605,
 0.0018329807108324356,
 0.004832930238571752,
 0.012742749857031334,
 0.03359818286283781,
 0.08858667904100823,
 0.23357214690901212,
 0.615848211066026,
 1.623776739188721,
 4.281332398719396,
 11.288378916846883,
 29.763514416313132,
 78.47599703514607,
 206.913808111479,
 545.5594781168514,
 1438.44988828766,
 3792.690190732246,
 10000.0]

In [102]:
2.63665090e-04

0.00026366509

1

## Datasets

In [51]:
# TODO: Define datasets

# Iris
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, y_train, X_test, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

## Evaluation

In [158]:
# Some random solution that we will pass to the sa() to start with

# LogisticRegression
random_lr_solution = {
    'name': 'LogisticRegression',
    'parameters': {
        'penalty': 'l2',
        'C': 0.23357214690901212,
        'solver': 'liblinear',
        'n_jobs': -1
    }
}

# # search_space[random_lr_solution['name']]['C'][0]
# # random_lr_solution['parameters']['C']
# index = search_space[random_lr_solution['name']]['C'].index(random_lr_solution['parameters']['C'])
# index

neighbor(random_lr_solution)

Randomly got  C
Parameter space [0.0001, 0.00026366508987303583, 0.0006951927961775605, 0.0018329807108324356, 0.004832930238571752, 0.012742749857031334, 0.03359818286283781, 0.08858667904100823, 0.23357214690901212, 0.615848211066026, 1.623776739188721, 4.281332398719396, 11.288378916846883, 29.763514416313132, 78.47599703514607, 206.913808111479, 545.5594781168514, 1438.44988828766, 3792.690190732246, 10000.0]
Current parameter value 0.23357214690901212


{'name': 'LogisticRegression',
 'parameters': {'penalty': 'l2',
  'C': 0.08858667904100823,
  'solver': 'liblinear',
  'n_jobs': -1}}

In [157]:
sa(random_solution, X_train, y_train, X_test, y_test)

NameError: name 'sa' is not defined

In [142]:
search_space[random_lr_solution['name']]['C']

[0.0001,
 0.00026366508987303583,
 0.0006951927961775605,
 0.0018329807108324356,
 0.004832930238571752,
 0.012742749857031334,
 0.03359818286283781,
 0.08858667904100823,
 0.23357214690901212,
 0.615848211066026,
 1.623776739188721,
 4.281332398719396,
 11.288378916846883,
 29.763514416313132,
 78.47599703514607,
 206.913808111479,
 545.5594781168514,
 1438.44988828766,
 3792.690190732246,
 10000.0]