## Imports 

In [27]:
import math
from sklearn import model_selection
from sklearn.model_selection import train_test_split 
from sklearn import metrics 
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from random import *

## Simulated Annealing

In [None]:
# Simulated Annealing function steps
# @parameters 
# sol - random solution
# @return solution and cost

# 1. Generate a random solution
# 2. Calculate its cost using a cost function (accuracy of the ML Model)
# 3. Generate a random neigboring solution
# 4. Calculate new solutions cost (accuracy of the ML model)
# 5. Compare solutions
#     - If c_new > c_old move to the new solution
#     - If c_new < c_old maybe move to the new solution
# 6. Repeat steps until an acceptable solution is found or max number of iterations is reached
def sa(sol, X_train, y_train, X_test, y_test):
    t = 1.0
    t_min = 0.00001
    alpha = 0.9
    old_cost = cost(sol, X_train, y_train, X_test, y_test)
    while t > t_min:
        i = 1
        while i <= 100:
            new_sol = neighbor(sol)
            new_cost = cost(new_sol, X_train, y_train, X_test, y_test)
            ap = acceptance_probability(old_cost, new_cost, t)
            if ap > random():
                sol = new_sol
                old_cost = new_cost
            i += 1
        t = t * alpha
    return sol, cost          

In [35]:
# Function to generate neighboring solution
# Solution is defined as a machine learning model along with a set of parameters
# i.e.,
# solution = {
#     model: 'LinearRegression',
#     parameters: {
#         fit_intercept: true,
#         normalize: true,
#         copy_X: false,
#         n_jobs: 4,
#     }
# }
# TODO: Implement for each ML model (could use cases inside)
def neighbor(sol):
    # Use the search_space to find a new neighbor of the current solution and return that
    parameter_space = search_space[sol['name']]
    
    # Chose a random parameter and modify it 
    parameter = choice(list(parameter_space.keys())) # random.choice()
    
    # Grab the random parameter from our current solution and change it
    modified_parameter = sol['parameters'][parameter]
    # TODO: How do we know its value rand and what we should change it to?
    
    # Create a new solution copy the current one and replace the randomly chosen parameter
    new_sol = sol
    new_sol[parameter] = modified_parameter
    
    # Return new neighboring solution
    return new_sol

In [None]:
# Function to return the cost of a current solution
# In our case the cost is the accuracy (or we can use other metrics) of the current ML model / parameter configuration
# TODO: RMSE or some other metric?
def cost(sol, X_train, y_train, X_test, y_test): 
    # TODO Here we need to train the model on the data
    return sol.score()

In [None]:
# Function which recommends if we should jump to a new solutions or not
# 1.0 - definitely switch
# 0.0 - definitely stay put
# 0.5 - 50/50 odds of switching
# Usually calculated by e^((c_new - c_old)/t)

def acceptance_probability(old_cost, new_cost, t): 
    if new_cost < old_cost:
        return 1.0;
    else:
        return math.exp((new_cost - old_cost)/t)

## ML Models

In [19]:
# TODO: Define 5 ML models

# Search Space that will be used to define our neighborhood of ML models and hyperparameters
search_space = {
    'Model1': {
        'property1': 'value_range',
        'property2': 'value_range',
        'property3': 'value_range',
        'property4': 'value_range'
    },
    'Model2': {
        'property1': 'value_range',
        'property2': 'value_range',
        'property3': 'value_range',
        'property4': 'value_range'
    },
    'Model3': {
        'property1': 'value_range',
        'property2': 'value_range',
        'property3': 'value_range',
        'property4': 'value_range'
    },
    'Model4': {
        'property1': 'value_range',
        'property2': 'value_range',
        'property3': 'value_range',
        'property4': 'value_range'
    },
    'Model5': {
        'property1': 'value_range',
        'property2': 'value_range',
        'property3': 'value_range',
        'property4': 'value_range'
    }
}

In [43]:
search_space['Model1']['property1']

'value_range'

## Datasets

In [None]:
# TODO: Define datasets
X = [1, 2, 4];
y = [1, 1, 2];
X_train, y_train, X_test, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

## Evaluation

In [46]:
# Some random solution that we will pass to the sa() to start with
random_solution = {
    'name': 'Model1',
    'parameters': {
        'property1': 'value_range',
        'property2': 'value_range',
        'property3': 'value_range',
        'property4': 'value_range'
    }
}

new_sol = neighbor(random_solution)


'value_range'

In [None]:
sa(random_solution, X_train, y_train, X_test, y_test)