In [1]:
import pycosat
import secrets
import math
import numpy as np
import time
import pandas as pd
import scipy.stats as st
from sklearn.neural_network import MLPRegressor
NUM_FEATURES = 124
NUM_SOLUTIONS = 100
NUM_MUTATED = 100



In [2]:
def gen_scores(value):
    scores = []
    for i in range(NUM_FEATURES):
        scores.append(np.random.randint(0,value+1))
    total = np.sum(scores)
    final_scores = []
    for score in scores:
        final_scores.append(score/total)
    return final_scores

In [3]:
def boolean_to_CNF(solution):
    cnf = []
    for i, val in enumerate(solution):
        if val == 1:
            cnf.append(i+1)
        else:
            cnf.append(-1*(i+1))
    return cnf

In [4]:
def validate_CNF(cnf, individual):
    for clause in cnf:
        valid = False
        for val in clause:
            if individual[abs(val)-1] == val:
                valid = True
        if not valid:
            return False
    return True

In [5]:
def train(X_train, y_train):
    return MLPRegressor(random_state=1, max_iter=500).fit(X_train, y_train)

In [6]:
def predict(item, X_train, y_train):
    #print(X_train, y_train)
    #import pdb;pdb.set_trace()
    clf = MLPRegressor(random_state=1, max_iter=500).fit(X_train, y_train)
    return clf.predict([item])

In [7]:
def mutate(population, scores, p=.01):
    items = [p.item for p in population]
    size = len(population)
    wanted_size = NUM_MUTATED
    idx = np.argsort(scores)[:wanted_size]
    ret_pop = []
    for i in idx:
        ret_pop.append(items[i])
    mutated = list(map(lambda s: [1 - x if np.random.rand() <= p else x for x in s  ], ret_pop))
    mutated_items = [Item(sol) for sol in mutated]
    return mutated_items

In [8]:
def sort(population, scores):
    return population, scores
    idx = np.argsort(scores)
    print(idx)
    ret_pop, ret_scores = [],[]
    for i in idx:
        ret_pop.append(population[i])
        ret_scores.append(scores[i])
    return ret_pop, ret_scores

In [9]:
def oracle(item, human):
    return int(np.sum(np.multiply(item, human)))

In [10]:
class Item:
    max_features = -math.inf
    min_features = math.inf
    max_totalcost = -math.inf
    min_totalcost = math.inf
    max_known = -math.inf
    min_known = math.inf
    max_featuresused = -math.inf
    min_featuresused = math.inf
    costs = [secrets.randbelow(10) for _ in range(NUM_FEATURES)]
    defective = [bool(secrets.randbelow(2)) for _ in range(NUM_FEATURES)]
    used = [bool(secrets.randbelow(2)) for _ in range(NUM_FEATURES)]

    def __init__(self, item):
        self.r = -1
        self.d = -1
        self.theta = -1
        self.item = item
        self.score = 0
        self.features = sum(item)
        self.selectedpoints = 0
        self.totalcost = sum(np.multiply(item, self.costs))
        self.knowndefects = sum(np.multiply(item, self.defective))
        self.featuresused = sum(np.multiply(item, self.used))
        
    def better(self, other):
        east_cols = [self.totalcost, self.knowndefects, self.featuresused,
                     self.selectedpoints/100]
        west_cols = [other.totalcost, other.knowndefects, other.featuresused,
                     other.selectedpoints/100]
        s1, s2, n = 0, 0, len(east_cols)
        i = 0
        for e_col, b_col in zip(east_cols, west_cols):
            a = e_col
            b = b_col
            if i >= n-2:
                s1 -= math.e**(1 * (a - b) / n)
                s2 -= math.e**(1 * (b - a) / n)
            else:
                s1 -= math.e**(-1 * (a - b) / n)
                s2 -= math.e**(-1 * (b - a) / n)
            i += 1
        # To simulate a 1 second or more eval function add line below
        # time.sleep(1)
        return s1 / n < s2 / n

    def __lt__(self, other):
        return self.better(other)

In [11]:
def ga_method(initial_population, interaction_number, generations):
    cur_interaction_number = interaction_number
    cur_population = initial_population
    #fits = []
    human_scores = []
    best_score = 1e7
    produced_items = []
    best_item = None
    cur_i = 0
    model = None
    cur_generation = 0
    print('----------generation', cur_generation, 'with population', len(cur_population), '---------------')

    while cur_generation < generations -1:
        if cur_i == len(cur_population) -1:
            cur_generation += 1
            cur_population += mutate(produced_items, [item.score for item in produced_items])
            #import pdb; pdb.set_trace()

            print('----------generation', cur_generation, 'with population', len(cur_population), '---------------')
            cur_i+=1

        if cur_interaction_number > 0:
            cur_interaction_number -= 1
            score = oracle(cur_population[cur_i].item, human)
            cur_population[cur_i].selectedpoints = score
            produced_items.append(cur_population[cur_i])
            human_scores.append(score)
            cur_i+=1
        if cur_interaction_number == 0:
            cur_interaction_number -=1
            model = train([item.item for item in produced_items], human_scores)
            score = model.predict([cur_population[cur_i].item])
            cur_population[cur_i].selectedpoints = score
            produced_items.append(cur_population[cur_i])
            human_scores.append(score)
            cur_i+=1
        if cur_interaction_number < 0:
            cur_interaction_number -=1
            score = model.predict([cur_population[cur_i].item])
            cur_population[cur_i].selectedpoints = score
            produced_items.append(cur_population[cur_i])
            human_scores.append(score)
            cur_i+=1
        
    produced_items.sort()
    best_item = produced_items[0]
    for index, item in enumerate(produced_items):
            item.score = index/float(len(produced_items))
    best_score = best_item.score
    fits = [item.score for item in produced_items]
    return best_item, best_score, produced_items, fits
    

In [12]:
it, a, c, d, u, s, cv, dv, uv, sv,v, t = [], [], [], [], [], [], [], [], [], [], [], []
for i in range(20):
    human = gen_scores(100)
    file = open("Scrum.dimacs")
    lines = file.readlines()
    names = []
    for line in lines[:NUM_FEATURES]:
        names.append(line.split(' ')[2][:-1])
    dimacs = lines[NUM_FEATURES+1:]
    cnf = [[int(s)  for s in line.split(' ') if int(s) != 0] for line in dimacs]
    
    cnfsol = []
    i = 0
    for sol in pycosat.itersolve(cnf):
        if i == NUM_SOLUTIONS:
            break
        i += 1
        cnfsol.append(sol)
    print(i)
    solutions = [[1 if val > 0 else 0 for val in sol] for sol in cnfsol]
    items = [Item(sol) for sol in solutions]
    start_time = time.time()
    best_item, score, produced_items, scores = ga_method(items, 80, 100)
    
    a.append(50)
    it.append(NUM_FEATURES)
    total_time = time.time() - start_time
    t.append(total_time)
    print("it took", total_time ,"seconds")
    valid = 0
    valid_items, valid_scores = [], []
    for item, sc in zip(produced_items, scores):
        sol = boolean_to_CNF(item.item)
        if validate_CNF(cnf, sol):
            valid+=1
            valid_items.append(item)
            valid_scores.append(sc)
    fit = st.percentileofscore(scores, score)
    c.append(best_item.totalcost)
    d.append(best_item.knowndefects)
    u.append(best_item.featuresused)
    s.append(fit)
    print("Percentile of all solutions =", st.percentileofscore(scores, score))
    print("Valid:", valid)
    print("Not Valid:", len(produced_items)-valid)
    print("%Valid:", valid/len(produced_items))
    v.append(valid/len(produced_items))
    sorted_i, sorted_scores = sort(valid_items, valid_scores)
    fitv = st.percentileofscore(sorted_scores, sorted_scores[0])
    cv.append(sorted_i[0].totalcost)
    dv.append(sorted_i[0].knowndefects)
    uv.append(sorted_i[0].featuresused)
    sv.append(fitv)
    print("Percentile of best valid solution =", st.percentileofscore(sorted_scores, sorted_scores[0]))
    
df = pd.DataFrame(
    {
        'I': a
        'S': it,
        'Cost': c,
        'Known Defects': d,
        'Features Used': u,
        'Score': s,
        'Valid %':v,
        'Valid Cost': cv,
        'Valid Known Defects': dv,
        'Valid Features Used': uv,
        'Valid Score': sv,
        'Time': t
     }).T
df.to_csv('src/sneak/Scores/Baseline.csv')



100
----------generation 0 with population 100 ---------------
----------generation 1 with population 199 ---------------
----------generation 2 with population 299 ---------------
----------generation 3 with population 399 ---------------
----------generation 4 with population 499 ---------------
----------generation 5 with population 599 ---------------
----------generation 6 with population 699 ---------------
----------generation 7 with population 799 ---------------
----------generation 8 with population 899 ---------------
----------generation 9 with population 999 ---------------
----------generation 10 with population 1099 ---------------
----------generation 11 with population 1199 ---------------
----------generation 12 with population 1299 ---------------
----------generation 13 with population 1399 ---------------
----------generation 14 with population 1499 ---------------
----------generation 15 with population 1599 ---------------
----------generation 16 with population 

----------generation 42 with population 4299 ---------------
----------generation 43 with population 4399 ---------------
----------generation 44 with population 4499 ---------------
----------generation 45 with population 4599 ---------------
----------generation 46 with population 4699 ---------------
----------generation 47 with population 4799 ---------------
----------generation 48 with population 4899 ---------------
----------generation 49 with population 4999 ---------------
----------generation 50 with population 5099 ---------------
----------generation 51 with population 5199 ---------------
----------generation 52 with population 5299 ---------------
----------generation 53 with population 5399 ---------------
----------generation 54 with population 5499 ---------------
----------generation 55 with population 5599 ---------------
----------generation 56 with population 5699 ---------------
----------generation 57 with population 5799 ---------------
----------generation 58 

----------generation 81 with population 8199 ---------------
----------generation 82 with population 8299 ---------------
----------generation 83 with population 8399 ---------------
----------generation 84 with population 8499 ---------------
----------generation 85 with population 8599 ---------------
----------generation 86 with population 8699 ---------------
----------generation 87 with population 8799 ---------------
----------generation 88 with population 8899 ---------------
----------generation 89 with population 8999 ---------------
----------generation 90 with population 9099 ---------------
----------generation 91 with population 9199 ---------------
----------generation 92 with population 9299 ---------------
----------generation 93 with population 9399 ---------------
----------generation 94 with population 9499 ---------------
----------generation 95 with population 9599 ---------------
----------generation 96 with population 9699 ---------------
----------generation 97 

----------generation 14 with population 1499 ---------------
----------generation 15 with population 1599 ---------------
----------generation 16 with population 1699 ---------------
----------generation 17 with population 1799 ---------------
----------generation 18 with population 1899 ---------------
----------generation 19 with population 1999 ---------------
----------generation 20 with population 2099 ---------------
----------generation 21 with population 2199 ---------------
----------generation 22 with population 2299 ---------------
----------generation 23 with population 2399 ---------------
----------generation 24 with population 2499 ---------------
----------generation 25 with population 2599 ---------------
----------generation 26 with population 2699 ---------------
----------generation 27 with population 2799 ---------------
----------generation 28 with population 2899 ---------------
----------generation 29 with population 2999 ---------------
----------generation 30 

----------generation 54 with population 5499 ---------------
----------generation 55 with population 5599 ---------------
----------generation 56 with population 5699 ---------------
----------generation 57 with population 5799 ---------------
----------generation 58 with population 5899 ---------------
----------generation 59 with population 5999 ---------------
----------generation 60 with population 6099 ---------------
----------generation 61 with population 6199 ---------------
----------generation 62 with population 6299 ---------------
----------generation 63 with population 6399 ---------------
----------generation 64 with population 6499 ---------------
----------generation 65 with population 6599 ---------------
----------generation 66 with population 6699 ---------------
----------generation 67 with population 6799 ---------------
----------generation 68 with population 6899 ---------------
----------generation 69 with population 6999 ---------------
----------generation 70 

----------generation 91 with population 9199 ---------------
----------generation 92 with population 9299 ---------------
----------generation 93 with population 9399 ---------------
----------generation 94 with population 9499 ---------------
----------generation 95 with population 9599 ---------------
----------generation 96 with population 9699 ---------------
----------generation 97 with population 9799 ---------------
----------generation 98 with population 9899 ---------------
----------generation 99 with population 9999 ---------------
it took 3.653038263320923 seconds
Percentile of all solutions = 0.010203040506070809
Valid: 248
Not Valid: 9553
%Valid: 0.025303540455055607
Percentile of best valid solution = 0.4032258064516129
100
----------generation 0 with population 100 ---------------
----------generation 1 with population 199 ---------------
----------generation 2 with population 299 ---------------
----------generation 3 with population 399 ---------------
----------gener

----------generation 27 with population 2799 ---------------
----------generation 28 with population 2899 ---------------
----------generation 29 with population 2999 ---------------
----------generation 30 with population 3099 ---------------
----------generation 31 with population 3199 ---------------
----------generation 32 with population 3299 ---------------
----------generation 33 with population 3399 ---------------
----------generation 34 with population 3499 ---------------
----------generation 35 with population 3599 ---------------
----------generation 36 with population 3699 ---------------
----------generation 37 with population 3799 ---------------
----------generation 38 with population 3899 ---------------
----------generation 39 with population 3999 ---------------
----------generation 40 with population 4099 ---------------
----------generation 41 with population 4199 ---------------
----------generation 42 with population 4299 ---------------
----------generation 43 

----------generation 67 with population 6799 ---------------
----------generation 68 with population 6899 ---------------
----------generation 69 with population 6999 ---------------
----------generation 70 with population 7099 ---------------
----------generation 71 with population 7199 ---------------
----------generation 72 with population 7299 ---------------
----------generation 73 with population 7399 ---------------
----------generation 74 with population 7499 ---------------
----------generation 75 with population 7599 ---------------
----------generation 76 with population 7699 ---------------
----------generation 77 with population 7799 ---------------
----------generation 78 with population 7899 ---------------
----------generation 79 with population 7999 ---------------
----------generation 80 with population 8099 ---------------
----------generation 81 with population 8199 ---------------
----------generation 82 with population 8299 ---------------
----------generation 83 

it took 3.6606478691101074 seconds
Percentile of all solutions = 0.010203040506070809
Valid: 242
Not Valid: 9559
%Valid: 0.024691358024691357
Percentile of best valid solution = 0.4132231404958678
100
----------generation 0 with population 100 ---------------
----------generation 1 with population 199 ---------------
----------generation 2 with population 299 ---------------
----------generation 3 with population 399 ---------------
----------generation 4 with population 499 ---------------
----------generation 5 with population 599 ---------------
----------generation 6 with population 699 ---------------
----------generation 7 with population 799 ---------------
----------generation 8 with population 899 ---------------
----------generation 9 with population 999 ---------------
----------generation 10 with population 1099 ---------------
----------generation 11 with population 1199 ---------------
----------generation 12 with population 1299 ---------------
----------generation 13 wi

----------generation 29 with population 2999 ---------------
----------generation 30 with population 3099 ---------------
----------generation 31 with population 3199 ---------------
----------generation 32 with population 3299 ---------------
----------generation 33 with population 3399 ---------------
----------generation 34 with population 3499 ---------------
----------generation 35 with population 3599 ---------------
----------generation 36 with population 3699 ---------------
----------generation 37 with population 3799 ---------------
----------generation 38 with population 3899 ---------------
----------generation 39 with population 3999 ---------------
----------generation 40 with population 4099 ---------------
----------generation 41 with population 4199 ---------------
----------generation 42 with population 4299 ---------------
----------generation 43 with population 4399 ---------------
----------generation 44 with population 4499 ---------------
----------generation 45 

----------generation 68 with population 6899 ---------------
----------generation 69 with population 6999 ---------------
----------generation 70 with population 7099 ---------------
----------generation 71 with population 7199 ---------------
----------generation 72 with population 7299 ---------------
----------generation 73 with population 7399 ---------------
----------generation 74 with population 7499 ---------------
----------generation 75 with population 7599 ---------------
----------generation 76 with population 7699 ---------------
----------generation 77 with population 7799 ---------------
----------generation 78 with population 7899 ---------------
----------generation 79 with population 7999 ---------------
----------generation 80 with population 8099 ---------------
----------generation 81 with population 8199 ---------------
----------generation 82 with population 8299 ---------------
----------generation 83 with population 8399 ---------------
----------generation 84 

it took 3.6808128356933594 seconds
Percentile of all solutions = 0.010203040506070809
Valid: 266
Not Valid: 9535
%Valid: 0.02714008774614835
Percentile of best valid solution = 0.37593984962406013
100
----------generation 0 with population 100 ---------------
----------generation 1 with population 199 ---------------
----------generation 2 with population 299 ---------------
----------generation 3 with population 399 ---------------
----------generation 4 with population 499 ---------------
----------generation 5 with population 599 ---------------
----------generation 6 with population 699 ---------------
----------generation 7 with population 799 ---------------
----------generation 8 with population 899 ---------------
----------generation 9 with population 999 ---------------
----------generation 10 with population 1099 ---------------
----------generation 11 with population 1199 ---------------
----------generation 12 with population 1299 ---------------
----------generation 13 wi

----------generation 29 with population 2999 ---------------
----------generation 30 with population 3099 ---------------
----------generation 31 with population 3199 ---------------
----------generation 32 with population 3299 ---------------
----------generation 33 with population 3399 ---------------
----------generation 34 with population 3499 ---------------
----------generation 35 with population 3599 ---------------
----------generation 36 with population 3699 ---------------
----------generation 37 with population 3799 ---------------
----------generation 38 with population 3899 ---------------
----------generation 39 with population 3999 ---------------
----------generation 40 with population 4099 ---------------
----------generation 41 with population 4199 ---------------
----------generation 42 with population 4299 ---------------
----------generation 43 with population 4399 ---------------
----------generation 44 with population 4499 ---------------
----------generation 45 

----------generation 67 with population 6799 ---------------
----------generation 68 with population 6899 ---------------
----------generation 69 with population 6999 ---------------
----------generation 70 with population 7099 ---------------
----------generation 71 with population 7199 ---------------
----------generation 72 with population 7299 ---------------
----------generation 73 with population 7399 ---------------
----------generation 74 with population 7499 ---------------
----------generation 75 with population 7599 ---------------
----------generation 76 with population 7699 ---------------
----------generation 77 with population 7799 ---------------
----------generation 78 with population 7899 ---------------
----------generation 79 with population 7999 ---------------
----------generation 80 with population 8099 ---------------
----------generation 81 with population 8199 ---------------
----------generation 82 with population 8299 ---------------
----------generation 83 

it took 3.652864933013916 seconds
Percentile of all solutions = 0.010203040506070809
Valid: 270
Not Valid: 9531
%Valid: 0.027548209366391185
Percentile of best valid solution = 0.37037037037037035
100
----------generation 0 with population 100 ---------------
----------generation 1 with population 199 ---------------
----------generation 2 with population 299 ---------------
----------generation 3 with population 399 ---------------
----------generation 4 with population 499 ---------------
----------generation 5 with population 599 ---------------
----------generation 6 with population 699 ---------------
----------generation 7 with population 799 ---------------
----------generation 8 with population 899 ---------------
----------generation 9 with population 999 ---------------
----------generation 10 with population 1099 ---------------
----------generation 11 with population 1199 ---------------
----------generation 12 with population 1299 ---------------
----------generation 13 wi