# Greedy search and A*-search using `aima-python`

Algorithm implementations taken from [here](https://github.com/aimacode/aima-python/blob/master/search-4e.ipynb)

* *State* is defined by gifts in bags

* *Goal states* are defined by filled bags satisfying problem conditions

* *Actions* : put a gift in a bag with a minimal weight

In [1]:
# https://ipython.org/ipython-doc/3/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

In [2]:
from time import time
from copy import deepcopy

import numpy as np
np.random.seed(2016)

In [225]:
from search import Problem, SantasBagsProblem, astar_search, uniform_cost_search, state_sequence, action_sequence
from search import fill_all_bags
import sys
sys.path.append('../common')
from utils import weight3 as weight_fn, weight_by_index
from utils import bag_weight, score
from utils import MAX_WEIGHT, AVAILABLE_GIFTS, GIFT_TYPES, N_TYPES, N_BAGS

Total number of gifts : 7166
Total number of available bags : 1000

If they have all a deterministic weight, say `m`, then with a bag weight limit $50$ there is a condition on the `m` :
$$ m * \frac{7166}{1000} < 50 \text{  or  } m < \frac{50 * 1000}{7166} = 6.977393245883338 $$
and we have around 7 gifts per bag.

In [23]:
fixed_weights = {}
# for i, g in enumerate(GIFT_TYPES):
#     fixed_weights[g] = weight_fn(i, 5000) - 0.15


# for i, t in enumerate(GIFT_TYPES):
#     only_one_type_state = []
#     state = list([0]*9)
#     state[i] = 1
#     for i in range(AVAILABLE_GIFTS[t]):
#         only_one_type_state.append(state)
#     s = score(only_one_type_state)
#     print("Type : ",t, " | Score : ", s, ", Score/nb : ", s / AVAILABLE_GIFTS[t])

fixed_weights['ball'] = 1.99876912083
fixed_weights['bike'] = 20.0021364556
fixed_weights['blocks'] = 11.6630321858
fixed_weights['book'] = 2.00086596571
fixed_weights['coal'] = 23.7866257713
fixed_weights['doll'] = 4.9993625282
fixed_weights['gloves'] = 1.40310067709
fixed_weights['horse'] = 4.99527064522
fixed_weights['train'] = 10.0234458084
fixed_weights

{'ball': 1.99876912083,
 'bike': 20.0021364556,
 'blocks': 11.6630321858,
 'book': 2.00086596571,
 'coal': 23.7866257713,
 'doll': 4.9993625282,
 'gloves': 1.40310067709,
 'horse': 4.99527064522,
 'train': 10.0234458084}

In [24]:
total_weight = 0
for g in GIFT_TYPES:
    total_weight += fixed_weights[g] * AVAILABLE_GIFTS[g]
total_weight

50511.0646006388

If they have different but deterministic weights, for example :
```
'ball': 1.9866563201517022,
'bike': 19.800818807615666,
'blocks': 11.585418654415042,
'book': 1.9428298042269647,
'coal': 23.791446017978064,
'doll': 5.0028447720814606,
'gloves': 1.3834638744694512,
'horse': 5.0109415127971237,
'train': 10.098774137002827
```
Total weight is then `50898.378575638228`. 

## Mean value and variance

In [272]:
def mean_std(state, count=100):
    w = []
    for c in range(100):        
        m = np.sum([weight_by_index(i) for i, v in enumerate(state) if v > 0])
        w.append(m)
    return np.mean(w), np.std(w)


def mean_2sigma(state):
    m, s = mean_std(state)
    return m + 2*s


mean_2sigma((1,1,1,0,0,0,0,0,0))


54.820392477002869

## Naive bag filling

In [129]:
GIFT_TYPES

['ball', 'bike', 'blocks', 'book', 'coal', 'doll', 'gloves', 'horse', 'train']

In [201]:
available_gifts = deepcopy(AVAILABLE_GIFTS)
total_state = []

goal_score = 0.85 * MAX_WEIGHT
print("Goal score : ", goal_score)


skip_types = ['coal']

for i, t in enumerate(GIFT_TYPES):
    
    if t in skip_types:
        continue
        
    if len(total_state) >= N_BAGS:
        break
    
    print(i, t, fixed_weights[t])

    nb_in_bag = int(np.floor(goal_score / fixed_weights[t]))
    nb_bags = int(np.floor(available_gifts[t] / nb_in_bag))    
    nb_bags = min(nb_bags, N_BAGS - len(total_state))
    
    print("nb_bags, nb_in_bag : ", nb_bags, nb_in_bag)
    for j in range(nb_bags):
        state = list([0]*9)
        state[i] = nb_in_bag   
        total_state.append(state)  
        available_gifts[t] -= nb_in_bag

    assert available_gifts[t] >= 0, "available_gifts[t] = %i" % available_gifts[t]
        

Goal score :  42.5
0 ball 1.99876912083
nb_bags, nb_in_bag :  52 21
1 bike 20.0021364556
nb_bags, nb_in_bag :  250 2
2 blocks 11.6630321858
nb_bags, nb_in_bag :  333 3
3 book 2.00086596571
nb_bags, nb_in_bag :  57 21
5 doll 4.9993625282
nb_bags, nb_in_bag :  125 8
6 gloves 1.40310067709
nb_bags, nb_in_bag :  6 30
7 horse 4.99527064522
nb_bags, nb_in_bag :  125 8
8 train 10.0234458084
nb_bags, nb_in_bag :  52 4


In [202]:
available_gifts

{'ball': 8,
 'bike': 0,
 'blocks': 1,
 'book': 3,
 'coal': 166,
 'doll': 0,
 'gloves': 20,
 'horse': 0,
 'train': 792}

In [203]:
len(total_state), total_state[0]

(1000, [21, 0, 0, 0, 0, 0, 0, 0, 0])

In [204]:
score(total_state, return_rejected=True), score(total_state) / len(total_state)

((33358.167939741041, 92.719999999999999), 33.312681343585638)

In [205]:
for i, t in enumerate(GIFT_TYPES):
    
    if t in skip_types:
        continue
        
    if available_gifts[t] == 0:
        continue
        
    print(t, available_gifts[t])
    
    for state in total_state:        
        if score((state, )) + fixed_weights[t] < MAX_WEIGHT and available_gifts[t] > 0:
            old_state = list(state)
            state[i] += 1
            print(old_state, " -> ", state)
            available_gifts[t] -= 1
        elif available_gifts[t] == 0:
            break
    break
        


ball 8
[21, 0, 0, 0, 0, 0, 0, 0, 0]  ->  [22, 0, 0, 0, 0, 0, 0, 0, 0]
[21, 0, 0, 0, 0, 0, 0, 0, 0]  ->  [22, 0, 0, 0, 0, 0, 0, 0, 0]
[21, 0, 0, 0, 0, 0, 0, 0, 0]  ->  [22, 0, 0, 0, 0, 0, 0, 0, 0]
[21, 0, 0, 0, 0, 0, 0, 0, 0]  ->  [22, 0, 0, 0, 0, 0, 0, 0, 0]
[21, 0, 0, 0, 0, 0, 0, 0, 0]  ->  [22, 0, 0, 0, 0, 0, 0, 0, 0]
[21, 0, 0, 0, 0, 0, 0, 0, 0]  ->  [22, 0, 0, 0, 0, 0, 0, 0, 0]
[21, 0, 0, 0, 0, 0, 0, 0, 0]  ->  [22, 0, 0, 0, 0, 0, 0, 0, 0]
[21, 0, 0, 0, 0, 0, 0, 0, 0]  ->  [22, 0, 0, 0, 0, 0, 0, 0, 0]


In [206]:
available_gifts

{'ball': 0,
 'bike': 0,
 'blocks': 1,
 'book': 3,
 'coal': 166,
 'doll': 0,
 'gloves': 20,
 'horse': 0,
 'train': 792}

In [207]:
len(total_state), total_state[:15]

(1000,
 [[22, 0, 0, 0, 0, 0, 0, 0, 0],
  [22, 0, 0, 0, 0, 0, 0, 0, 0],
  [22, 0, 0, 0, 0, 0, 0, 0, 0],
  [22, 0, 0, 0, 0, 0, 0, 0, 0],
  [22, 0, 0, 0, 0, 0, 0, 0, 0],
  [22, 0, 0, 0, 0, 0, 0, 0, 0],
  [22, 0, 0, 0, 0, 0, 0, 0, 0],
  [22, 0, 0, 0, 0, 0, 0, 0, 0],
  [21, 0, 0, 0, 0, 0, 0, 0, 0],
  [21, 0, 0, 0, 0, 0, 0, 0, 0],
  [21, 0, 0, 0, 0, 0, 0, 0, 0],
  [21, 0, 0, 0, 0, 0, 0, 0, 0],
  [21, 0, 0, 0, 0, 0, 0, 0, 0],
  [21, 0, 0, 0, 0, 0, 0, 0, 0],
  [21, 0, 0, 0, 0, 0, 0, 0, 0]])

In [209]:
score(total_state, return_rejected=True), score(total_state) / len(total_state)

((33365.903075387716, 92.090000000000003), 33.286912403178441)

## Uniform cost search 

In [8]:
n_bags_per_state = 1
empty_state = tuple([tuple([0]*N_TYPES)]*n_bags_per_state)

In [9]:
def fix_weight(gift_index):
    return fixed_weights[GIFT_TYPES[gift_index]]

def bag_fix_weight(bag):
    out = 0
    for i, c in enumerate(bag):
        out += fix_weight(i) * c
    return out

# def state_fix_score(state):
#     score = 0 
#     for bag in state:
#         score += bag_fix_weight(bag)
#     return score

type_cost = {}
for g in fixed_weights:
    type_cost[g] = 1.0/fixed_weights[g]
#     type_cost[g] = (MAX_WEIGHT - fixed_weights[g]) / MAX_WEIGHT 
type_cost

{'ball': 0.54269856736478583,
 'bike': 0.04987008479383815,
 'blocks': 0.086322293906909278,
 'book': 0.54715346988516089,
 'coal': 0.042793273642200952,
 'doll': 0.2038624694225695,
 'gloves': 0.81532436700695665,
 'horse': 0.20768985052312611,
 'train': 0.10062382892753075}

In [520]:
alpha = 0.5
goal_score = n_bags_per_state*MAX_WEIGHT*alpha
# initial_state = empty_state
initial_state = ((0,0,0,0,0,0,0,0,0,),)

type_cost['coal'] = 0
type_cost['gloves'] = 0
type_cost['book'] = 0


p = SantasBagsProblem(initial=initial_state,
                      gift_types=GIFT_TYPES, 
                      available_gifts=AVAILABLE_GIFTS,
                      max_weight=MAX_WEIGHT,    
                      type_cost=type_cost,
                      gift_weight_fn=fix_weight,
                      bag_weight_fn=bag_fix_weight,
                      goal_score=goal_score, 
                      score_fn=score,
                      verbose_level=0)

tic = time()
result = uniform_cost_search(p)
print(result.state, state_fix_score(result.state), score(result.state), goal_score)
print("Elapsed: ", time() - tic)

((0, 0, 0, 2, 1, 0, 0, 0, 0),) 45.7237170334 24.8974317844 25.0
Elapsed:  0.01367497444152832


In [523]:
total_state=[]
found_goal_states=[]
available_gifts=deepcopy(AVAILABLE_GIFTS)
counter = [0]

alpha = 0.7
goal_score = n_bags_per_state*MAX_WEIGHT*alpha
# type_cost['bike'] = 0
type_cost['coal'] = 0
type_cost['gloves'] = 0
type_cost['book'] = 0

In [524]:
def create_fix_weight_problem(state, available_gifts, **kwargs):
    return SantasBagsProblem(initial=state,
                      gift_types=GIFT_TYPES, 
                      available_gifts=available_gifts,
                      max_weight=MAX_WEIGHT,    
                      type_cost=type_cost,
                      gift_weight_fn=fix_weight,
                      bag_weight_fn=bag_fix_weight,
                      goal_score=goal_score, 
                      score_fn=score,
                      verbose_level=0)

def policy_1(p, state, available_gifts,
                    counter, total_state,
                    found_goal_states,
                    **kwargs):
    if (available_gifts['bike'] > 0 or available_gifts['coal'] > 0) and \
        (available_gifts['gloves'] > 0 or \
         available_gifts['book'] == -1):
        p.goal_score = 0.50 * n_bags_per_state*MAX_WEIGHT
        return True
    return False


def policy_2(p, state, available_gifts,
                    counter, total_state,
                    found_goal_states,
                    **kwargs):
    if available_gifts['ball'] > 10 or \
        available_gifts['horse'] > 10:
        p.goal_score = 0.7 * n_bags_per_state*MAX_WEIGHT 
        return True
    return False


def update_problem(p, state, available_gifts,
                    counter, total_state,
                    found_goal_states,
                    **kwargs):
    p.initial = state
    p.available_gifts = available_gifts
    
    if policy_1(p, state, available_gifts, counter, total_state, found_goal_states, **kwargs):
        return 
    elif policy_2(p, state, available_gifts, counter, total_state, found_goal_states, **kwargs):
        return

        
def termination_condition(p, counter, total_state):
    if p.goal_score < 18:
        return True
    return False
    

In [525]:
fill_all_bags(create_fix_weight_problem, uniform_cost_search, 
                total_state, found_goal_states, available_gifts, counter,
                score,
                update_problem_fn=update_problem,
                termination_condition_fn=termination_condition
             )

Filled bags :  0 / 1000
- Got a result :  25.0
Filled bags :  1 / 1000
- Got a result :  25.0
Filled bags :  2 / 1000
- Got a result :  25.0
Filled bags :  3 / 1000
-- Result is none | len(found_goal_states)= 2
--- Restart from :  ((0, 0, 0, 0, 1, 0, 3, 0, 0),)
Filled bags :  3 / 1000
-- Result is none | len(found_goal_states)= 1
--- Restart from :  ((0, 0, 0, 0, 1, 0, 2, 0, 0),)
Filled bags :  3 / 1000
- Got a result :  25.0
Filled bags :  4 / 1000
-- Result is none | len(found_goal_states)= 1
--- Restart from :  ((0, 0, 0, 0, 1, 0, 5, 0, 0),)
Filled bags :  4 / 1000
-- Result is none | len(found_goal_states)= 0
-- Remove some gift :  ((0, 0, 0, 0, 1, 0, 5, 0, 0),) ((0, 0, 0, 0, 1, 0, 3, 0, 0),)
Filled bags :  4 / 1000
-- Result is none | len(found_goal_states)= 0
-- Remove some gift :  ((0, 0, 0, 0, 1, 0, 3, 0, 0),) ((0, 0, 0, 0, 1, 0, 1, 0, 0),)
Filled bags :  4 / 1000
- Got a result :  25.0
Filled bags :  5 / 1000
- Got a result :  25.0
Filled bags :  6 / 1000
-- Result is none | l

In [526]:
[(k,available_gifts[k]) for k in GIFT_TYPES]

[('ball', 0),
 ('bike', 498),
 ('blocks', 0),
 ('book', 0),
 ('coal', 160),
 ('doll', 0),
 ('gloves', 0),
 ('horse', 0),
 ('train', 0)]

In [20]:
len(total_state), total_state

(906,
 [(0, 1, 0, 0, 0, 0, 9, 0, 0),
  (0, 1, 0, 0, 0, 0, 10, 0, 0),
  (0, 1, 0, 0, 0, 0, 10, 0, 0),
  (0, 1, 0, 0, 0, 0, 10, 0, 0),
  (0, 1, 0, 0, 0, 0, 10, 0, 0),
  (0, 1, 0, 0, 0, 0, 10, 0, 0),
  (0, 1, 0, 0, 0, 0, 11, 0, 0),
  (0, 1, 0, 0, 0, 0, 12, 0, 0),
  (0, 1, 0, 0, 0, 0, 12, 0, 0),
  (0, 1, 0, 0, 0, 0, 12, 0, 0),
  (0, 1, 0, 0, 0, 0, 12, 0, 0),
  (0, 1, 0, 0, 0, 0, 12, 0, 0),
  (0, 1, 0, 0, 0, 0, 12, 0, 0),
  (0, 1, 0, 0, 0, 0, 13, 0, 0),
  (0, 1, 0, 0, 0, 0, 13, 0, 0),
  (0, 1, 0, 1, 0, 0, 13, 0, 0),
  (0, 1, 0, 2, 0, 0, 14, 0, 0),
  (0, 1, 0, 0, 0, 0, 5, 0, 1),
  (0, 0, 2, 0, 0, 1, 0, 0, 1),
  (0, 0, 2, 0, 0, 1, 0, 0, 1),
  (0, 0, 2, 0, 0, 1, 0, 0, 1),
  (0, 0, 2, 0, 0, 1, 0, 0, 1),
  (0, 0, 2, 0, 0, 1, 0, 0, 1),
  (0, 0, 2, 0, 0, 1, 0, 1, 1),
  (0, 0, 2, 0, 0, 1, 0, 1, 1),
  (0, 0, 2, 0, 0, 1, 0, 0, 1),
  (0, 0, 2, 1, 0, 1, 0, 0, 1),
  (0, 0, 2, 2, 0, 1, 0, 0, 1),
  (0, 0, 2, 1, 0, 1, 0, 0, 1),
  (0, 0, 2, 1, 0, 1, 0, 0, 1),
  (0, 0, 2, 1, 0, 1, 0, 0, 1),
  (0, 0, 2, 1, 0,

In [527]:
len(total_state), score(total_state), score(total_state) * (N_BAGS-100) / (n_bags_per_state * counter[0])

(929, 31927.904167840701, 30950.399617934021)

Single policy :
    Latest) all(0.7) -> decrease goal score
        ~ 32131.721211058415

Mixed policies :

    Latest) bike + gloves (0.6) -> all (0.7) -> decrease goal score
        33523.643628706719 (Kaggle : 33467.83042)
        1000
        [('ball', 0),
         ('bike', 220),
         ('blocks', 0),
         ('book', 0),
         ('coal', 166),
         ('doll', 0),
         ('gloves', 0),
         ('horse', 0),
         ('train', 190)]

    Latest) bike + gloves (0.56) -> all (0.7) -> decrease goal score
        33993.004687925008
        986
        [('ball', 0),
         ('bike', 360),
         ('blocks', 0),
         ('book', 0),
         ('coal', 166),
         ('doll', 0),
         ('gloves', 0),
         ('horse', 0),
         ('train', 0)]

    Latest) bike + gloves (0.65) -> all (0.7) -> decrease goal score
        32131.721211058415
        912
        [('ball', 0),
         ('bike', 499),
         ('blocks', 0),
         ('book', 0),
         ('coal', 166),
         ('doll', 0),
         ('gloves', 0),
         ('horse', 0),
         ('train', 1)]

    Latest) bike + gloves (0.5) -> all (0.7) -> decrease goal score
        33549.794246489044
        1000
        [('ball', 104),
         ('bike', 197),
         ('blocks', 0),
         ('book', 0),
         ('coal', 166),
         ('doll', 149),
         ('gloves', 0),
         ('horse', 0),
         ('train', 403)]

    A) coal + gloves (0.45) -> all (0.7) -> decrease goal score
        32812.886466132135

    B) bike + gloves (0.45) -> all (0.7) -> decrease goal score
       33490.613109426813 
       985
       `[('ball', 0), ('bike', 380), ('blocks', 0), ('book', 0), ('coal', 166), ('doll', 0), ('gloves', 0), ('horse', 0), ('train', 0)]`
       
    C) bike + gloves + book (0.45) -> all (0.7) -> decrease goal score
       32167.026060391439
       1000
        [('ball', 0),
         ('bike', 0),
         ('blocks', 0),
         ('book', 0),
         ('coal', 166),
         ('doll', 0),
         ('gloves', 0),
         ('horse', 0),
         ('train', 410)]       
    
    D) coal + bike + gloves + book (0.45) -> all (0.7) -> decrease goal score
        30809.252822201233
        1000
        [('ball', 171),
         ('bike', 0),
         ('blocks', 231),
         ('book', 0),
         ('coal', 140),
         ('doll', 184),
         ('gloves', 0),
         ('horse', 0),
         ('train', 360)]
    
        
    D) coal + bike + gloves  (0.45) -> all (0.7) -> decrease goal score
        31345.30418976711
        1000
        [('ball', 474),
         ('bike', 0),
         ('blocks', 0),
         ('book', 0),
         ('coal', 147),
         ('doll', 0),
         ('gloves', 0),
         ('horse', 0),
         ('train', 870)]


## Heuristic functions + A*-search

In [None]:
(4, 0, 0, 0, 0, 1, 0, 1, 2),
(2, 0, 0, 0, 0, 3, 0, 1, 2),
(2, 0, 0, 0, 0, 2, 0, 1, 2),

In [457]:
# Penalize states that heavy than goal score 
def h1(state, limit):    
    w = state_fix_score(state)
    if w < limit:
        return (limit - w) / limit
    else:
        return 1.0
    
    
# Difference between fix / 'real' masses
def h2(state):
    w2 = state_fix_score(state)
    w1 = score(state)
    if w2 > 0:
        return abs(w2 - w1)*1.0 / w2
    else:
        return 1.0


# Count number of rejected bags :
def h3(state):
    _, rejected = score(state, return_rejected=True)
    return rejected

In [458]:
from search import FrontierPQ, Node

In [459]:
goal_score = 0.70 * n_bags_per_state*MAX_WEIGHT
limit = goal_score + 0.5*(MAX_WEIGHT-goal_score)

p = create_fix_weight_problem(empty_state, AVAILABLE_GIFTS, goal_score=goal_score)

costfn=lambda node: node.path_cost + h1(node.state, limit) + h2(node.state) + h3(node.state)

frontier = FrontierPQ(Node(p.initial), costfn)
explored = set()

In [477]:
node = frontier.pop()
print("- State:", node.state, 
        "fix", state_fix_score(node.state), 
        "score", score(node.state),
        "h1: ", h1(node.state, limit), "h2: ", h2(node.state), "h3: ", h3(node.state))
if p.is_goal(node.state):
    print(">>> Goal found : ", node.state)
explored.add(node.state)
for action in p.actions(node.state):
    child = node.child(p, action)
    print("-- State:", child.state, 
          "fix", state_fix_score(child.state), 
          "score", score(child.state),
          "h1: ", h1(child.state, limit), "h2: ", h2(child.state), "h3: ", h3(node.state))
    if child.state not in explored and child.state not in frontier:
        frontier.add(child)
    elif child.state in frontier:
        incumbent = frontier.states[child.state]
        if child.path_cost < frontier.costfn(incumbent):
            frontier.replace(child)


- State: ((0, 1, 0, 2, 0, 0, 4, 0, 0),) fix 28.8480869132 score 28.2085391547 h1:  0.321221484395 h2:  0.0153426888208 h3:  0.04
-- State: ((1, 1, 0, 2, 0, 0, 4, 0, 0),) fix 30.6923053596 score 29.839552795 h1:  0.277828109187 h2:  0.0405162512696 h3:  0.02
-- State: ((0, 2, 0, 2, 0, 0, 4, 0, 0),) fix 48.8357922664 score 22.4741426036 h1:  1.0 h2:  0.602570591059 h3:  0.04
-- State: ((0, 1, 1, 2, 0, 0, 4, 0, 0),) fix 40.3754333661 score 28.1137703482 h1:  0.0499898031509 h2:  0.231573751385 h3:  0.05
-- State: ((0, 1, 0, 3, 0, 0, 4, 0, 0),) fix 30.7344758731 score 27.3038948021 h1:  0.276835861809 h2:  0.114555342884 h3:  0.05
-- State: ((0, 1, 0, 2, 0, 1, 4, 0, 0),) fix 33.6433429606 score 31.7544687764 h1:  0.20839193034 h2:  0.0702516127928 h3:  0.03
-- State: ((0, 1, 0, 2, 0, 0, 5, 0, 0),) fix 30.1199878232 score 29.3759009669 h1:  0.291294404159 h2:  0.0161380268047 h3:  0.04
-- State: ((0, 1, 0, 2, 0, 0, 4, 1, 0),) fix 33.7010574084 score 29.4556294405 h1:  0.207033943332 h2:  0.

In [490]:
type_cost = {}
for g in fixed_weights:
    type_cost[g] = ((MAX_WEIGHT - fixed_weights[g]) / MAX_WEIGHT)**5
type_cost

{'ball': 0.82869011216211463,
 'bike': 0.077919469277197048,
 'blocks': 0.26971846938409522,
 'book': 0.82506799925849772,
 'coal': 0.043791690385818934,
 'doll': 0.6040460471411685,
 'gloves': 0.87911831211321256,
 'horse': 0.60019984925156888,
 'train': 0.32735602883058251}

In [491]:
total_state=[]
found_goal_states=[]
available_gifts=deepcopy(AVAILABLE_GIFTS)
counter = [0]

alpha = 0.7
goal_score = n_bags_per_state*MAX_WEIGHT*alpha
# type_cost['bike'] = 0
# type_cost['coal'] = 0
# type_cost['gloves'] = 0
# type_cost['book'] = 0

In [492]:
limit = goal_score + 0.5*(MAX_WEIGHT-goal_score)

def final_heuristic_fn(state):
    return h1(state, limit) + h2(state) + h3(state)
    

def astar_search_algo(problem, **kwargs):
    return astar_search(problem, final_heuristic_fn, **kwargs)
    

def update_problem(p, state, available_gifts,
                    counter, total_state,
                    found_goal_states,
                    **kwargs):
    p.initial = state
    p.available_gifts = available_gifts
    
        
def termination_condition(p, counter, total_state):
    if p.goal_score < 18:
        return True
    return False
    

In [493]:
fill_all_bags(create_fix_weight_problem, astar_search_algo, 
                total_state, found_goal_states, available_gifts, counter,
                score,
                update_problem_fn=update_problem,
                termination_condition_fn=termination_condition
             )

Filled bags :  0 / 1000
- Got a result :  35.0
Filled bags :  1 / 1000
- Got a result :  35.0
Filled bags :  2 / 1000
- Got a result :  35.0
Filled bags :  3 / 1000
- Got a result :  35.0
Filled bags :  4 / 1000
- Got a result :  35.0
Filled bags :  5 / 1000
- Got a result :  35.0
Filled bags :  6 / 1000
- Got a result :  35.0
Filled bags :  7 / 1000
- Got a result :  35.0
Filled bags :  8 / 1000
- Got a result :  35.0
Filled bags :  9 / 1000
- Got a result :  35.0
Filled bags :  10 / 1000
- Got a result :  35.0
Filled bags :  11 / 1000
- Got a result :  35.0
Filled bags :  12 / 1000
- Got a result :  35.0
Filled bags :  13 / 1000
- Got a result :  35.0
Filled bags :  14 / 1000
- Got a result :  35.0
Filled bags :  15 / 1000
- Got a result :  35.0
Filled bags :  16 / 1000
- Got a result :  35.0
Filled bags :  17 / 1000
- Got a result :  35.0
Filled bags :  18 / 1000
- Got a result :  35.0
Filled bags :  19 / 1000
- Got a result :  35.0
>>> Current score:  717.939435413 32307.2745936
Fi

KeyboardInterrupt: 

In [None]:
type_cost = {
#     "horse": 1.2,
#     "ball": 1.2,
#     "bike": 2,
#     "train": 1.15,
#     "coal": 0.5,
#     "book": 2.0,
#     "doll": 1.3,
#     "blocks": 0.5,
#     "gloves": 3    
}

len(gift_types), gift_types

State is tuple (bags) of tuples (gifts) :

```
( 
#  ball, bike, block, book, coal, doll, gloves, horse, train  
    (0,1,0,3,0,0,0,0,2), # bag 1
    (0,0,0,0,0,2,5,6,0), # bag 2
    ...
)
```


In [8]:
s = ((0, 0, 3, 0, 0, 1, 0, 0, 0),)
score(s)

36.175530726146057

In [9]:
available_gifts

{'ball': 1100,
 'bike': 500,
 'blocks': 1000,
 'book': 1200,
 'coal': 166,
 'doll': 1000,
 'gloves': 200,
 'horse': 1000,
 'train': 1000}

In [67]:
class SantasBagsProblem(Problem):
            
    def _get_gift_type_indices(self, state):
        out = []
        types = np.sum(np.array(state), axis=0)
        for index, t in enumerate(types):
            if t < self.available_gifts[self.gift_types[index]]:
                out.append(index)
        return out
        
    def actions(self, state):
        """Return a list of actions executable in this state."""                        
        _gift_type_indices = self._get_gift_type_indices(state)
        if len(_gift_type_indices) == 0:
            print("No gifts available to create actions")
            return []
        
        if self.vebose_level >= 2: 
            print("_gift_type_indices : ", _gift_type_indices)
        # find a bag with a minimal weight  
        min_weight_bag_index = 0
        min_weight = self.max_weight
        for i, bag in enumerate(state):
            w = self.bag_weight_fn(bag)
            if min_weight > w:
                min_weight_bag_index = i
                min_weight = w
        
        if self.vebose_level >= 2:         
            print("min_weight_bag_index : ", min_weight_bag_index)
        
        actions = []
        bag_weight = self.bag_weight_fn(state[min_weight_bag_index])
        for _index in _gift_type_indices:
            gift_weight = self.gift_weight_fn(_index)        
            if bag_weight + gift_weight < self.max_weight:
                actions.append((min_weight_bag_index, _index))
        
        # print("actions: ", actions)
        # if len(actions) == 0:
        #     print("No actions found for the state : ", state, min_weight_bag_index, bag_weight)

        return actions
    
    def result(self, state, action):
        """The state that results from executing this action in this state."""
        bag_id, gift_type_index = action
        # print("-- result : input state: ", state, "action: ", action)                
        new_state = list(state)
        bag = list(new_state[bag_id])
        bag[gift_type_index] += 1
        new_state[bag_id] = tuple(bag)
        # print("-- result : output state: ", new_state)
        return tuple(new_state)
        
    def is_goal(self, state):
        """True if the state is a goal."""        
        for bag in state:
            if sum(bag) < 3:
                # print("- A bag with less than 3 gifts found : ", state)
                return False
        
        # Check if solution is available:
        types = np.sum(np.array(state), axis=0)
        for index, t in enumerate(types):
            if t > self.available_gifts[self.gift_types[index]]:
                return False

        mean_score = self._validation(state)
        # if mean_score > self.goal_score:
        # print("- Mean score : ", mean_score, " / ", self.goal_score, state)
        return mean_score > self.goal_score

    def step_cost(self, state, action, result=None):
        """The cost of taking this action from this state."""
        if self.type_cost is not None:
            bag_id, gift_type_index  = action
            gift_type = self.gift_types[gift_type_index]
            if gift_type in self.type_cost:
                return self.type_cost[gift_type]  # Override this if actions have different costs
            return 1.0
        return 1.0
                
    def _validation(self, state, count=100):
        scores = np.zeros(count)
        rejected = 0
        for c in range(count):
            score = 0
            for bag in state:
                total_weight_ = self.bag_weight_fn(bag, n1=1)
                if total_weight_ < self.max_weight:
                    score += total_weight_
                else:
                    rejected += 1
            scores[c] = score
        # if rejected > 0:
        # print("Rejected bags : %f / %i" % (rejected*1.0/count, len(state)))
        return np.mean(scores)

In [68]:
alpha = 0.7
goal_score = n_bags*max_weight*alpha
print("Goal score: ", goal_score)

Goal score:  35.0


In [69]:
def compute_normal_identical(mu, sigma, a=50):
    """
    Solve n*mu + 3*sigma*sqrt(n) < a
    return: n, Mu, Sigma
    """
    for n in range(100):
        y = mu*n + 3.0*sigma*np.sqrt(n)
        if y > a:
            break  
    n -= 1
    m = n*mu
    s = sigma*np.sqrt(n)
    return n, m, s

In [70]:
# # Horse : 
# ag = deepcopy(available_gifts)
# n, m, s = compute_normal_identical(5, 2)
# initial_state=tuple([tuple([ag['horse'].pop() for i in range(n)]) for j in range(n_bags)])
# initial_state

In [71]:
initial_state=tuple([tuple([0]*n_types)]*n_bags)

In [72]:
# initial_state = ((0, 0, 2, 0, 0, 1, 0, 0, 0), )
# initial_state

In [73]:
p = SantasBagsProblem(initial=initial_state,
                      gift_types=gift_types, 
                      available_gifts=available_gifts,
                      max_weight=max_weight,    
                      type_cost=type_cost,
                      weight_fn=weight_fn,
                      bag_weight_fn=bag_weight,
                      goal_score=goal_score)

Define heuristic function :


In [74]:
from math import floor

def round_value(x, digits=1):
    x = floor(x*10**digits) / 10**digits
    return x


def h12(state):     
#     h1 = 0
#     h2 = 0
#     for bag in state:
#         h1 += bag_weight(bag, 200)
#         w = bag_weight(bag, 200)
#         h2 += abs(max_weight*alpha - w)
#     h1 = abs(goal_score - h1) / n_bags    
#     return max(h1, h2) 
    return round_value((goal_score - score(state))**2/goal_score**2)


def h3(state):     
    return round_value(alpha*(max_weight*n_bags - score(state))**2/(max_weight*n_bags)**2)


def h4(state): 
    h1 = 0
    for bag in state:
        h1 += bag_weight(bag, 200)
    return abs(goal_score - h1)/goal_score


def h5(state):
    h = 0
    for bag in state:
        h += np.count_nonzero(bag) * 1.0 / len(bag)
    return h


def h6(state):
    h = 0.0
#     for bag in state:
#         h += (len(bag) - np.count_nonzero(bag)) / len(bag)
    return h * 1.0  / len(state)
                

def h7(state):
    rejected = 0
    count = 100
    for c in range(count):
        for bag in state:
            total_weight_ = bag_weight(bag, n1=1)
            if total_weight_ > max_weight:
                rejected += 1
    return rejected * 1.0 / count
        


def final_heuristic_fn(state):  
    res = np.max(np.array([h12(state), h3(state), h4(state), h5(state), h7(state)]))
#     res = (h12(state) + h5(state) + h6(state) + h7(state))/max_weight
#     print ("final_heuristic_fn : ", res, state)
    return res
#     return np.max(np.array([h12(state),]))
#     return np.max(np.array([h12(state), h3(state), h4(state), h5(state)]))

In [75]:
initial_state = ((1, 1, 2, 2, 1, 0, 1, 1, 1), )
h12(initial_state), h3(initial_state), h5(initial_state), h7(initial_state), final_heuristic_fn(initial_state)

(0.9, 0.6, 0.8888888888888888, 0.97, 1.5163085708790256)

In [80]:
ag2 = available_gifts.copy()
# print(update_available_gifts(ag2, result.state))
# ag2['blocks'] = 0
# ag2['doll'] = 0
# ag2['bike'] = 0
# ag2['horse'] = 0
# ag2['train'] = 0
# ag2['ball'] = 2
# ag2['book'] = 2
# ag2['gloves'] = 0
ag2

{'ball': 1100,
 'bike': 500,
 'blocks': 1000,
 'book': 1200,
 'coal': 166,
 'doll': 1000,
 'gloves': 200,
 'horse': 1000,
 'train': 1000}

In [81]:
from time import time
p.initial = ((0, 0, 0, 0, 1, 0, 0, 0, 0), )# initial_state
p.available_gifts = ag2

In [82]:
type_cost = {
#     "horse": 150,
#     "ball": 0.1,
#     "bike": 150,
#     "train": 150,
    "coal": 0.150,
    "book": 0.750,
#     "doll": 150,
#     "blocks": 150,
    "gloves": 0.750    
}
type_cost

{'book': 0.75, 'coal': 0.15, 'gloves': 0.75}

In [83]:
tic = time()
result = astar_search(p, final_heuristic_fn, verbose=True)
print(result)
print("Elapsed: ", time() - tic)

Check node:  ((0, 0, 0, 0, 1, 0, 0, 0, 0),)  |  0 ,  0.356725548956
Check node:  ((0, 0, 0, 0, 2, 0, 0, 0, 0),)  |  0.15 ,  0.64
Check node:  ((0, 0, 0, 1, 1, 0, 0, 0, 0),)  |  0.75 ,  0.979671714658
Check node:  ((0, 0, 0, 0, 1, 0, 1, 0, 0),)  |  0.75 ,  1.03629979028
Check node:  ((0, 0, 0, 0, 1, 1, 0, 0, 0),)  |  1.0 ,  1.22222222222
Check node:  ((0, 0, 1, 0, 1, 0, 0, 0, 0),)  |  1.0 ,  1.34
Check node:  ((0, 0, 0, 0, 1, 0, 0, 0, 1),)  |  1.0 ,  1.22222222222
Check node:  ((0, 0, 0, 0, 1, 0, 0, 1, 0),)  |  1.0 ,  1.22222222222
Check node:  ((0, 0, 0, 1, 2, 0, 0, 0, 0),)  |  0.9 ,  1.39
Check node:  ((1, 0, 0, 0, 1, 0, 0, 0, 0),)  |  1.0 ,  1.28479766213
Check node:  ((0, 0, 0, 0, 2, 0, 1, 0, 0),)  |  0.9 ,  1.31471404951
Check node:  ((0, 1, 0, 0, 1, 0, 0, 0, 0),)  |  1.0 ,  1.44
Check node:  ((1, 0, 0, 0, 2, 0, 0, 0, 0),)  |  1.15 ,  1.68
Check node:  ((0, 0, 0, 2, 1, 0, 0, 0, 0),)  |  1.5 ,  1.77147849826
Check node:  ((0, 0, 0, 0, 1, 0, 2, 0, 0),)  |  1.5 ,  1.72222222222
Check 

KeyboardInterrupt: 

In [62]:
from search import FrontierPQ, Node

n1 = Node(p.initial)
n2 = Node(p.initial)

explored = set()
explored.add(n2.state)

print(n1.state in explored)

frontier = FrontierPQ(n1)

n1.state in frontier


    

True


True

In [767]:
result.state

AttributeError: 'NoneType' object has no attribute 'state'

In [26]:
h12(result.state), h3(result.state), h6(result.state), h7(result.state)

(3.0714772504384142e-05, 0.051752795317218887, 0.0, 0.05)

In [456]:
p._validation(((0, 0, 0, 0, 1, 0, 0, 0, 0),))

21.887851989590789

In [85]:
type_cost = {
#       "ball": 0.9,
#     "horse": 1.2,
#     "bike": 0.5,
#     "train": 0.9,
#     "coal": 0.7,
#     "book": 1.0,
#     "doll": 1.0,
    "blocks": 0.5,
#     "gloves": 0.3    
}

In [86]:
# type_cost = {"horse": 0.9, "train": 0.9, "bike": 1.9, "book": 1.9, "gloves": 1.9, "ball": 1.9}

In [87]:
def remove_gifts(state, gifts_to_remove=2):
    _gift_removed = 0
    new_state = list(state)
    for bag_index, bag in enumerate(state):
        for i in range(gifts_to_remove):
            gift_type_index = np.argmax(bag)
            if bag[gift_type_index] > 0:
                bag = list(new_state[bag_index])
                bag[gift_type_index] -= 1
                new_state[bag_index] = tuple(bag)
                _gift_removed += 1
    if _gift_removed == 0:
        state=tuple([tuple([0]*n_types)]*n_bags)
    else:
        print("-- Remove some gift : ", state, tuple(new_state))
        state = tuple(new_state)
    return state


# def remove_gifts2(state, gifts_to_remove=1):
#     _gift_removed = 0
#     new_state = list(state)
#     for bag_index, bag in enumerate(state):
#         for i in range(gifts_to_remove):
            
#             for g in bag:
#             gift_type_index = np.argmax(bag)
#             weight_fn(index, n1)
            
#             if bag[gift_type_index] > 0:
#                 bag = list(new_state[bag_index])
#                 bag[gift_type_index] -= 1
#                 new_state[bag_index] = tuple(bag)
#                 _gift_removed += 1
#     if _gift_removed == 0:
#         state=tuple([tuple([0]*n_types)]*n_bags)
#     else:
#         print("-- Remove some gift : ", state, tuple(new_state))
#         state = tuple(new_state)
#     return state

In [88]:
from copy import deepcopy

In [89]:
total_n_bags = 1000
n_bags = 1

total_state=[]
found_goal_states=[]
ag=deepcopy(available_gifts)
counter = 0

In [90]:
alpha = 0.7
goal_score = n_bags*max_weight*alpha
print("Goal score: ", goal_score)

Goal score:  35.0


Each bag is filled using A\*-search algorithm. However, the initial state is not always the empty bag. 

When a *goal* state is found, its stored in a list without repeating. Next state is searched from the previous found goal state as initial. If nothing is found a state from stored goal states is used to restart the search. If no stored goal states, remove some gifts and restart the search


In [96]:
gifts_to_remove = 2
empty_state = tuple([tuple([0]*n_types)]*n_bags)
state=(total_state[-1],) if len(total_state) > 0 else empty_state

while n_bags * counter < total_n_bags:
    
    print("Filled bags : ", n_bags * counter, "/", total_n_bags)
    p = SantasBagsProblem(initial=tuple(state),
                          gift_types=gift_types, 
                          available_gifts=ag,
                          max_weight=max_weight,    
                          type_cost=type_cost,
                          weight_fn=weight_fn,
                          bag_weight_fn=bag_weight,
                          goal_score=goal_score)
    tic = time()
    result = astar_search(p, final_heuristic_fn, verbose=True)
    if result is not None:
        print("- Got a result")
        update_available_gifts(ag, result.state)
        if len(found_goal_states) == 0 or found_goal_states[-1] != result.state:
            found_goal_states.append(result.state)
        total_state += result.state
        counter += 1
        state=(total_state[-1],)
    else:
        print("-- Result is none | len(found_goal_states)=", len(found_goal_states))
        if len(found_goal_states) > 0:
            state=found_goal_states.pop()
            print("--- Restart from : ", state)
        else:
            if state != empty_state:
                state=remove_gifts(state)
            else:
                alpha -= 0.05
                goal_score = n_bags*max_weight*alpha
                print(">>> Goal score changed: ", goal_score)    
        
    if counter > 0 and (n_bags * counter % 20) == 0:
        s = score(total_state)
        print(">>> Current score: ", s, s * (total_n_bags) / (n_bags * counter) )
        
    if counter > 0 and (n_bags * counter % 30) == 0:
        print(">>> Currently available gifts : ", [(k,ag[k]) for k in gift_types])
        
    print("- Elapsed: ", time() - tic)

Filled bags :  334 / 1000
Check node:  ((0, 0, 1, 0, 0, 0, 0, 3, 1),)  |  0 ,  0.333333333333
Check node:  ((0, 0, 1, 0, 0, 0, 0, 4, 1),)  |  1.0 ,  1.33333333333
Check node:  ((0, 0, 1, 0, 0, 0, 0, 3, 2),)  |  1.0 ,  1.37
Check node:  ((0, 0, 1, 0, 0, 1, 0, 3, 1),)  |  1.0 ,  1.44444444444
Check node:  ((1, 0, 1, 0, 0, 0, 0, 3, 1),)  |  1.0 ,  1.44444444444
Check node:  ((0, 0, 1, 1, 0, 0, 0, 3, 1),)  |  1.0 ,  1.44444444444
Check node:  ((0, 0, 1, 0, 0, 0, 1, 3, 1),)  |  1.0 ,  1.44444444444
Check node:  ((0, 0, 1, 0, 0, 0, 0, 5, 1),)  |  2.0 ,  2.34
Check node:  ((0, 0, 1, 1, 0, 0, 0, 4, 1),)  |  2.0 ,  2.44444444444
Check node:  ((2, 0, 1, 0, 0, 0, 0, 3, 1),)  |  2.0 ,  2.44444444444
Check node:  ((0, 0, 1, 0, 0, 2, 0, 3, 1),)  |  2.0 ,  2.44444444444
Check node:  ((1, 0, 1, 0, 0, 0, 0, 4, 1),)  |  2.0 ,  2.44444444444
Check node:  ((0, 0, 1, 0, 0, 0, 1, 4, 1),)  |  2.0 ,  2.44444444444
Check node:  ((0, 0, 1, 0, 0, 0, 1, 3, 2),)  |  2.0 ,  2.44444444444
Check node:  ((1, 0, 1, 0, 

KeyboardInterrupt: 

In [93]:
[(k,ag[k]) for k in gift_types]

[('ball', 1034),
 ('bike', 500),
 ('blocks', 0),
 ('book', 291),
 ('coal', 166),
 ('doll', 1000),
 ('gloves', 156),
 ('horse', 997),
 ('train', 999)]

In [94]:
len(total_state), total_state

(334,
 [(0, 0, 3, 0, 0, 0, 0, 0, 0),
  (0, 0, 3, 0, 0, 0, 0, 0, 0),
  (0, 0, 3, 0, 0, 0, 0, 0, 0),
  (0, 0, 3, 0, 0, 0, 0, 0, 0),
  (0, 0, 3, 1, 0, 0, 0, 0, 0),
  (0, 0, 3, 1, 0, 0, 0, 0, 0),
  (0, 0, 3, 1, 0, 0, 0, 0, 0),
  (0, 0, 3, 1, 0, 0, 0, 0, 0),
  (0, 0, 3, 1, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  (0, 0, 3, 2, 0, 0, 0, 0, 0),
  

In [269]:
score(total_state), score(total_state) * (total_n_bags) / (n_bags * counter)

(30920.446246334373, 35572.765198530171)

In [82]:
def to_submission(state, available_gifts, gift_types):
    n_gifts = [available_gifts[t] for t in gift_types]
    output = []
    for bag in state:
        o = []
        for index, count in enumerate(bag):   
            gift_type = gift_types[index]
            for i in range(count):
                v = n_gifts[index] - 1
                assert v >= 0, "Gift index is negative"
                o.append(gift_type + '_%i' % v)
                n_gifts[index] -= 1
        output.append(o)  
    return output
        
submission = to_submission(total_state, AVAILABLE_GIFTS, GIFT_TYPES)
# print(submission)

In [83]:
from datetime import datetime
submission_file = '../results/submission_' + \
                  str(datetime.now().strftime("%Y-%m-%d-%H-%M")) + \
                  '.csv'

In [84]:
def write_submission(state, filename):
    with open(filename, 'w') as w:
        w.write("Gifts\n")
        for bag in state:
            w.write(' '.join(bag) + '\n')
    
write_submission(submission, submission_file)