# A*-search using `aima-python`

Algorithm implementations taken from [here](https://github.com/aimacode/aima-python/blob/master/search-4e.ipynb)

* *State* is defined by gifts in bags

* *Goal states* are defined by filled bags satisfying problem conditions

* *Actions* : put a gift in a bag with a minimal weight

In [1]:
# https://ipython.org/ipython-doc/3/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

In [593]:
import numpy as np
np.random.seed(2016)

In [594]:
from search import Problem, astar_search, uniform_cost_search, state_sequence, action_sequence
import sys
sys.path.append('../common')
from utils import weight3 as weight_fn

In [791]:
n_bags = 2
max_weight = 50

n_horses = 1000
n_balls = 1100
n_bikes = 500
n_trains = 1000
n_coals = 166
n_books = 1200
n_dolls = 1000
n_blocks = 1000
n_gloves = 200

# available_gifts = {
#     "horse": ['horse_%i' % i for i in range(n_horses)],
#     "ball": ['ball_%i' % i for i in range(n_balls)],
#     "bike": ['bike_%i' % i for i in range(n_bikes)],
#     "train": ['train_%i' % i for i in range(n_trains)],
#     "coal": ['coal_%i' % i for i in range(n_coals)],
#     "book": ['book_%i' % i for i in range(n_books)],
#     "doll": ['doll_%i' % i for i in range(n_dolls)],
#     "blocks": ['blocks_%i' % i for i in range(n_blocks)],
#     "gloves": ['gloves_%i' % i for i in range(n_gloves)],
# }

available_gifts = {
    "horse": n_horses,
    "ball": n_balls,
    "bike": n_bikes,
    "train": n_trains,
    "coal": n_coals,
    "book": n_books,
    "doll": n_dolls,
    "blocks": n_blocks,
    "gloves": n_gloves
}


type_cost = {
#     "horse": 1.2,
#     "ball": 1.2,
#     "bike": 2,
#     "train": 1.15,
#     "coal": 0.5,
#     "book": 2.0,
#     "doll": 1.3,
#     "blocks": 0.5,
#     "gloves": 3    
}

gift_types = sorted(list(available_gifts.keys()))
n_types = len(gift_types)

In [792]:
len(gift_types), gift_types

(9,
 ['ball',
  'bike',
  'blocks',
  'book',
  'coal',
  'doll',
  'gloves',
  'horse',
  'train'])

State is tuple (bags) of tuples (gifts) :

```
( 
#  ball, bike, block, book, coal, doll, gloves, horse, train  
    (0,1,0,3,0,0,0,0,2), # bag 1
    (0,0,0,0,0,2,5,6,0), # bag 2
    ...
)
```


In [793]:
def bag_weight(bag, n1=100):
    weight = 0
    for index, count in enumerate(bag):
        for i in range(count):
            weight += weight_fn(index, n1)
    return weight

In [794]:
def score(state, count=100):
    scores = np.zeros(count)
    for c in range(count):
        score = 0
        for bag in state:
            total_weight_ = bag_weight(bag, n1=1)
            if total_weight_ < max_weight:
                score += total_weight_
        scores[c] = score
    return np.mean(scores)

In [795]:
((3, 1, 1, 2, 0, 1, 2, 1, 1), (4, 1, 2, 1, 2, 4, 2, 2, 1))
score(s)

37.22498483513322

In [796]:
#[len(available_gifts[k]) for k in available_gifts]
available_gifts

{'ball': 1100,
 'bike': 500,
 'blocks': 1000,
 'book': 1200,
 'coal': 166,
 'doll': 1000,
 'gloves': 200,
 'horse': 1000,
 'train': 1000}

In [917]:
class SantasBagsProblem(Problem):
            
    def _get_gift_type_indices(self, state):
        out = []
        types = np.sum(np.array(state), axis=0)
        for index, t in enumerate(types):
            if t < self.available_gifts[self.gift_types[index]]:
                out.append(index)
        return out
        
    def actions(self, state):
        """Return a list of actions executable in this state."""                        
        _gift_type_indices = self._get_gift_type_indices(state)
        if len(_gift_type_indices) == 0:
            print("No gifts available to create actions")
            return []
    
#         print("_gift_type_indices : ", _gift_type_indices)
        # find a bag with a minimal weight  
        min_weight_bag_index = 0
        min_weight = self.max_weight
        for i, bag in enumerate(state):
            w = self.bag_weight_fn(bag)
            if min_weight > w:
                min_weight_bag_index = i
                min_weight = w
                
#         print("min_weight_bag_index : ", min_weight_bag_index)
        
        actions = []
        bag_weight = self.bag_weight_fn(state[min_weight_bag_index])
        for _index in _gift_type_indices:
            gift_weight = self.weight_fn(_index)        
            if bag_weight + gift_weight < self.max_weight:
                actions.append((min_weight_bag_index, _index))
#         print("actions: ", actions)
#         if len(actions) == 0:
#             print("No actions found for the state : ", state, min_weight_bag_index, bag_weight)
        return actions
    
    def result(self, state, action):
        """The state that results from executing this action in this state."""
        bag_id, gift_type_index = action
#         print("-- result : input state: ", state, "action: ", action)                
        new_state = list(state)
        bag = list(new_state[bag_id])
        bag[gift_type_index] += 1
        new_state[bag_id] = tuple(bag)
#         print("-- result : output state: ", new_state)
        return tuple(new_state)

    def is_goal(self, state):
        """True if the state is a goal."""        
        for bag in state:
            if sum(bag) < 3:
#                 print("- A bag with less than 3 gifts found : ", state)
                return False

        mean_score = self._validation(state)
#         if mean_score > self.goal_score:
        print("- Mean score : ", mean_score, " / ", self.goal_score, state)
        return mean_score > self.goal_score

    def step_cost(self, state, action, result=None):
        """The cost of taking this action from this state."""
        if self.type_cost is not None:
            bag_id, gift_type_index  = action
            gift_type = self.gift_types[gift_type_index]
            if gift_type in self.type_cost:
                return self.type_cost[gift_type]  # Override this if actions have different costs
            return 1.0
        return 1.0
                
    def _validation(self, state, count=100):
        scores = np.zeros(count)
        rejected = 0
        for c in range(count):
            score = 0
            for bag in state:
                total_weight_ = self.bag_weight_fn(bag, n1=1)
                if total_weight_ < self.max_weight:
                    score += total_weight_
                else:
                    rejected += 1
            scores[c] = score
#         if rejected > 0:
#             print("Rejected bags : %f / %i" % (rejected*1.0/count, len(state)))
        return np.mean(scores)

In [918]:
alpha = 0.75
goal_score = n_bags*max_weight*alpha
print("Goal score: ", goal_score)

Goal score:  37.5


In [919]:
def compute_normal_identical(mu, sigma, a=50):
    """
    Solve n*mu + 3*sigma*sqrt(n) < a
    return: n, Mu, Sigma
    """
    for n in range(100):
        y = mu*n + 3.0*sigma*np.sqrt(n)
        if y > a:
            break  
    n -= 1
    m = n*mu
    s = sigma*np.sqrt(n)
    return n, m, s

In [920]:
from copy import deepcopy

In [921]:
# # Horse : 
# ag = deepcopy(available_gifts)
# n, m, s = compute_normal_identical(5, 2)
# initial_state=tuple([tuple([ag['horse'].pop() for i in range(n)]) for j in range(n_bags)])
# initial_state

In [922]:
# initial_state=tuple([tuple([0]*n_types)]*n_bags)

In [923]:
initial_state = ((total_state[0], ))
initial_state

((2, 1, 0, 1, 0, 1, 1, 1, 0),)

In [924]:
p = SantasBagsProblem(initial=initial_state,
                      gift_types=gift_types, 
                      available_gifts=available_gifts,
                      max_weight=max_weight,    
                      type_cost=type_cost,
                      weight_fn=weight_fn,
                      bag_weight_fn=bag_weight,
                      goal_score=goal_score)

Define heuristic function :


In [925]:
# gift_mean_weight = {}
# for gift_type in gift_types:
#     gift_mean_weight[gift_type] = weight_fn(gift_type, 100)    

In [930]:
def h12(state):     
#     h1 = 0
#     h2 = 0
#     for bag in state:
#         h1 += bag_weight(bag, 200)
#         w = bag_weight(bag, 200)
#         h2 += abs(max_weight*alpha - w)
#     h1 = abs(goal_score - h1) / n_bags    
#     return max(h1, h2) 
    return abs(goal_score - score(state))

# def h3(state):
#     h = 0
#     for bag in state:
#         w = 0
#         for gift in bag:
#             gift_type = gift.split('_')[0]
#             w += gift_mean_weight[gift_type]
#         h += max(max_weight/n_bags - w, 0.0)
#     return h

# def h4(state):     
#     h = 0
#     for bag in state:
#         w = 0
#         for gift in bag:
#             gift_type = gift.split('_')[0]
#             w += gift_mean_weight[gift_type]
#         h += w
#     h = max(max_weight*n_bags - h, 0.0) / n_bags
#     return h


def h5(state):
    h = 0
    for bag in state:
        if sum(bag) < 3:
            h += max_weight*alpha
        elif sum(bag) < 5:
            h += max_weight*alpha / 2
        elif sum(bag) < 7:
            h += max_weight*alpha / 4
        elif sum(bag) < 8:
            h += max_weight*alpha / 8
    return h


def h6(state):
    h = 0
    for bag in state:
        h += 2*(len(bag) - np.count_nonzero(bag))
    return h
                

def h7(state):
    rejected = 0
    for c in range(100):
        for bag in state:
            total_weight_ = bag_weight(bag, n1=1)
            if total_weight_ > max_weight:
                rejected += 1
    return rejected
        


def final_heuristic_fn(state):  
    res = np.max(np.array([h12(state), h5(state), h6(state), h7(state)]))
#     res = (h12(state) + h5(state) + h6(state) + h7(state))/max_weight
#     print ("final_heuristic_fn : ", res, state)
    return res
#     return np.max(np.array([h12(state),]))
#     return np.max(np.array([h12(state), h3(state), h4(state), h5(state)]))

In [931]:
h12(initial_state), h5(initial_state), h6(initial_state), h7(initial_state)

(4.5375389824295311, 4.6875, 6, 14)

In [932]:
from time import time

In [933]:
tic = time()
result = astar_search(p, final_heuristic_fn)
print(result)
print("Elapsed: ", time() - tic)

- Mean score :  28.0016168726  /  37.5 ((2, 1, 0, 1, 0, 1, 1, 1, 0),)
- Mean score :  31.3224178374  /  37.5 ((2, 1, 0, 1, 0, 1, 2, 1, 0),)
- Mean score :  30.167205059  /  37.5 ((3, 1, 0, 1, 0, 1, 1, 1, 0),)
- Mean score :  29.3882766598  /  37.5 ((2, 1, 0, 2, 0, 1, 1, 1, 0),)
- Mean score :  30.452917291  /  37.5 ((2, 1, 0, 2, 0, 1, 2, 1, 0),)
- Mean score :  31.3650793999  /  37.5 ((3, 1, 0, 1, 0, 1, 2, 1, 0),)
- Mean score :  27.1068567884  /  37.5 ((4, 1, 0, 1, 0, 1, 1, 1, 0),)
- Mean score :  30.3416961269  /  37.5 ((3, 1, 0, 1, 0, 1, 2, 1, 0),)
- Mean score :  30.8179924339  /  37.5 ((2, 1, 0, 3, 0, 1, 1, 1, 0),)
- Mean score :  25.0977017607  /  37.5 ((3, 1, 0, 2, 0, 1, 1, 1, 0),)
- Mean score :  31.0442510247  /  37.5 ((2, 1, 0, 1, 0, 1, 3, 1, 0),)
- Mean score :  27.1706661757  /  37.5 ((2, 1, 0, 2, 0, 1, 2, 1, 0),)
- Mean score :  28.6900691145  /  37.5 ((2, 1, 0, 1, 0, 1, 4, 1, 0),)
- Mean score :  29.9409021428  /  37.5 ((3, 1, 0, 2, 0, 1, 2, 1, 0),)
- Mean score :  29.544

KeyboardInterrupt: 

In [None]:
result.state

In [839]:
h12(result.state), h5(result.state), h6(result.state), h7(result.state)

(0.66936740127657401, 15.75, 18, 10)

In [841]:
p._validation(result.state)

62.395484520900929

In [842]:
def update_available_gifts(ag, state):
    sum_gifts = np.sum(np.array(state), axis=0)
    for v, gift_type in zip(sum_gifts, gift_types):
        ag[gift_type] = ag[gift_type] - v
    assert np.sum([ag[k] for k in ag]) > 0, "Available gifts problem : {}".format(ag)

In [843]:
# type_cost = {
#     "horse": 1.9,
#     "ball": 1.7,
#     "bike": 1.4,
#     "train": 1.2,
#     "coal": 0.8,
#     "book": 1.9,
#     "doll": 1.9,
#     "blocks": 0.5,
#     "gloves": 1.9    
# }

In [845]:
type_cost = {
#     "horse": 1.3,
#     "ball": 1.2,
#     "bike": 1.1,
#     "train": 1.2,
#     "coal": 0.9,
#     "book": 1.0,
#     "doll": 1.0,
#     "blocks": 0.9,
#     "gloves": 1.2    
}

In [846]:
# type_cost = {"horse": 0.9, "train": 0.9, "bike": 1.9, "book": 1.9, "gloves": 1.9, "ball": 1.9}

In [853]:
total_n_bags = 1000
n_bags = 1

alpha = 0.65
goal_score = n_bags*max_weight*alpha
print("Goal score: ", goal_score)

total_state=[]
ag=deepcopy(available_gifts)
counter = 0

Goal score:  32.5


In [854]:
# n_threads = 4 
# from multiprocessing import Process, Lock, Pool

# def astar_parallel(l, thread_id):
    
#     l.acquire()
#     _counter = counter
#     l.release()
    
#     while n_bags * _counter < total_n_bags:
#         state=[()]*n_bags
#         print(thread_id, "| Filled bags : ", n_bags * _counter, "/", total_n_bags)
#         p = SantasBagsProblem(initial=tuple(state),
#                               gift_types=gift_types, 
#                               available_gifts=deepcopy(_ag),
#                               max_weight=max_weight,    
#                               type_cost=type_cost,
#                               weight_fn=weight_fn,
#                               bag_weight_fn=bag_weight,
#                               goal_score=goal_score)
#         tic = time()
#         result = astar_search(p, final_heuristic_fn)
#         l.acquire()
#         if result is not None:
#             print(thread_id, " : Got a result")
#             total_state += result.state
#             counter += 1
#             _counter = counter
#             update_available_gifts(ag, result.state)
#         else:
#             type_cost = {"horse": 0.9, "train": 0.9, "gloves": 1.9, "ball": 1.9 }
#         l.release()
#         print("- Elapsed: ", time() - tic)
    


# lock = Lock()
# for thread_id in range(n_threads):
#     Process(target=astar_parallel, args=(lock, thread_id)).start()

In [875]:
while n_bags * counter < total_n_bags:
    
    state=tuple([tuple([0]*n_types)]*n_bags)
    print("Filled bags : ", n_bags * counter, "/", total_n_bags)
    p = SantasBagsProblem(initial=tuple(state),
                          gift_types=gift_types, 
                          available_gifts=deepcopy(ag),
                          max_weight=max_weight,    
                          type_cost=type_cost,
                          weight_fn=weight_fn,
                          bag_weight_fn=bag_weight,
                          goal_score=goal_score)
    tic = time()
    result = astar_search(p, final_heuristic_fn)
    if result is not None:
        print("- Got a result")
        total_state += result.state
        counter += 1
        update_available_gifts(ag, result.state)
#     else:
#         type_cost = None
        
    if counter > 0 and (n_bags * counter % 10) == 0:
        s = score(total_state)
        print(">>> Current score: ", s, s * (total_n_bags) / (n_bags * counter) )
        
    print("- Elapsed: ", time() - tic)

Filled bags :  954 / 1000
- Got a result
- Elapsed:  1.2587978839874268
Filled bags :  955 / 1000
- Got a result
- Elapsed:  2.967970848083496
Filled bags :  956 / 1000
- Got a result
- Elapsed:  2.2226428985595703
Filled bags :  957 / 1000
- Got a result
- Elapsed:  3.0824851989746094
Filled bags :  958 / 1000
- Got a result
- Elapsed:  2.150761842727661
Filled bags :  959 / 1000
- Got a result
>>> Current score:  32380.8971347 33730.101182
- Elapsed:  17.224838972091675
Filled bags :  960 / 1000
- Got a result
- Elapsed:  1.2959909439086914
Filled bags :  961 / 1000
- Got a result
- Elapsed:  1.8934109210968018
Filled bags :  962 / 1000
- Got a result
- Elapsed:  6.781077861785889
Filled bags :  963 / 1000
- Got a result
- Elapsed:  1.3178260326385498
Filled bags :  964 / 1000
- Got a result
- Elapsed:  2.482193946838379
Filled bags :  965 / 1000
- Got a result
- Elapsed:  3.7229361534118652
Filled bags :  966 / 1000
- Got a result
- Elapsed:  31.187165021896362
Filled bags :  967 / 

In [876]:
[(k,ag[k]) for k in gift_types]

[('ball', 0),
 ('bike', 222),
 ('blocks', 0),
 ('book', 146),
 ('coal', 166),
 ('doll', 113),
 ('gloves', 0),
 ('horse', 48),
 ('train', 410)]

In [877]:
len(total_state), total_state

(1000,
 [(2, 1, 0, 1, 0, 1, 1, 1, 0),
  (2, 1, 0, 1, 0, 1, 1, 1, 0),
  (1, 1, 0, 1, 0, 0, 1, 2, 0),
  (2, 1, 0, 1, 0, 1, 1, 1, 0),
  (2, 1, 0, 1, 0, 0, 2, 1, 0),
  (1, 1, 0, 2, 0, 0, 1, 2, 0),
  (1, 1, 0, 1, 0, 0, 1, 2, 0),
  (1, 1, 0, 1, 0, 1, 3, 1, 0),
  (1, 1, 0, 0, 0, 2, 2, 1, 0),
  (1, 1, 0, 0, 0, 2, 2, 1, 0),
  (1, 1, 0, 1, 0, 1, 1, 2, 0),
  (1, 1, 0, 0, 0, 1, 2, 2, 0),
  (1, 1, 0, 2, 0, 0, 2, 1, 0),
  (1, 1, 1, 0, 0, 0, 1, 1, 0),
  (1, 1, 0, 0, 0, 1, 1, 1, 0),
  (2, 1, 0, 1, 0, 0, 3, 0, 1),
  (1, 1, 0, 1, 0, 1, 3, 1, 0),
  (1, 1, 0, 0, 0, 1, 1, 1, 0),
  (2, 1, 0, 0, 0, 0, 1, 2, 0),
  (1, 1, 0, 2, 0, 1, 1, 1, 0),
  (1, 1, 0, 0, 0, 0, 1, 1, 1),
  (1, 1, 0, 1, 0, 1, 2, 1, 0),
  (2, 1, 0, 0, 0, 0, 1, 2, 0),
  (1, 1, 0, 0, 0, 0, 1, 1, 1),
  (2, 1, 0, 1, 0, 1, 1, 1, 0),
  (2, 1, 0, 1, 0, 1, 1, 1, 0),
  (1, 1, 0, 0, 0, 0, 1, 1, 1),
  (1, 1, 0, 0, 0, 1, 1, 1, 0),
  (1, 1, 0, 1, 0, 0, 1, 2, 0),
  (1, 1, 0, 1, 0, 1, 1, 1, 0),
  (1, 1, 0, 0, 0, 0, 2, 1, 1),
  (2, 1, 0, 1, 0, 1, 2, 1, 0),
 

In [878]:
score(total_state), score(total_state) * (total_n_bags) / (n_bags * counter)

(33621.171855699089, 33632.674478612091)

In [888]:
def to_submission(state, available_gifts, gift_types):
    n_gifts = [available_gifts[t] for t in gift_types]
    output = []
    for bag in state:
        o = []
        for index, count in enumerate(bag):   
            gift_type = gift_types[index]
            for i in range(count):
                v = n_gifts[index] - 1
                assert v >= 0, "Gift index is negative"
                o.append(gift_type + '_%i' % v)
                n_gifts[index] -= 1
        output.append(o)  
    return output
        
submission = to_submission(total_state, available_gifts, gift_types)
# print(submission)

In [886]:
from datetime import datetime
submission_file = '../results/submission_' + \
                  str(datetime.now().strftime("%Y-%m-%d-%H-%M")) + \
                  '.csv'

In [887]:
def write_submission(state, filename):
    with open(filename, 'w') as w:
        w.write("Gifts\n")
        for bag in state:
            w.write(' '.join(bag) + '\n')
    
write_submission(submission, submission_file)