In [4]:
import random
from smartcab.environment import Agent, Environment
from smartcab.planner import RoutePlanner
from smartcab.simulator import Simulator
import operator
from smartcab.agent import LearningAgent

In [3]:
def simulate(alpha, gamma, trials):
    env = Environment()
    agent = env.create_agent(LearningAgent)
    agent.alpha = alpha
    agent.gamma = gamma
    env.set_primary_agent(agent, enforce_deadline=True)
    
    sim = Simulator(env, update_delay=0)
    sim.run(n_trials=trials)
    
    return {
        'Q': env.primary_agent.lookup_table,
        'R': env.primary_agent.rewards_table
    }

In [9]:
def performance_metric(rewards):
    return {
        'positive': sum([x[0] for x in rewards]),
        'negative': sum([x[1] for x in rewards]),
        'total': sum([x[2] for x in rewards]),
        'completed': sum([x[3] for x in rewards]),
        'count': len(rewards),
        'last_10_negatives': sum([x[1] for x in rewards[:10]])
    }

In [20]:
keys = [
    (1, 0, 100), 
    (0, 1, 100), 
    (0.9, 0.1, 100), 
    (0.1, 0.9, 100),
    (0.8, 0.2, 100), 
    (0.2, 0.8, 100),
    (0.3, 0.7, 100), 
    (0.7, 0.3, 100),
    (0.4, 0.6, 100), 
    (0.6, 0.4, 100),
    (0.5, 0.5, 100)
]
data = {}
M = {
    'alpha':[],
    'gamma': [],
    'positive': [],
    'negative': [],
    'total': [],
    'completed': [],
    'count': [],
    'last_10_negatives': []
}

In [21]:
for k in keys:
    data[k] = simulate(*k)
    print k


(1, 0, 100)
(0, 1, 100)
(0.9, 0.1, 100)
(0.1, 0.9, 100)
(0.8, 0.2, 100)
(0.2, 0.8, 100)
(0.3, 0.7, 100)
(0.7, 0.3, 100)
(0.4, 0.6, 100)
(0.6, 0.4, 100)
(0.5, 0.5, 100)


In [22]:
M = {
    'alpha':[],
    'gamma': [],
    'positive': [],
    'negative': [],
    'total': [],
    'completed': [],
    'count': [],
    'last_10_negatives': []
}
for k, v in data.iteritems():
    m = performance_metric(v['R'])
    M['alpha'].append(k[0])
    M['gamma'].append(k[1])
    M['positive'].append(m['positive'])
    M['negative'].append(m['negative'])
    M['total'].append(m['total'])
    M['completed'].append(m['completed'])
    M['count'].append(m['count'])
    M['last_10_negatives'].append(m['last_10_negatives'])

In [23]:
import pandas as pd
frm = pd.DataFrame(M)

In [24]:
frm

Unnamed: 0,alpha,completed,count,gamma,last_10_negatives,negative,positive,total
0,0.3,11,100,0.7,-7,-21,3461.0,3440.0
1,0.8,34,100,0.2,-6,-24,3128.0,3104.0
2,1.0,40,100,0.0,-5,-16,3128.5,3112.5
3,0.0,9,100,1.0,-153,-1588,1153.0,-435.0
4,0.7,33,100,0.3,-7,-30,3298.5,3268.5
5,0.6,19,100,0.4,-5,-31,3027.5,2996.5
6,0.9,98,100,0.1,-5,-19,2856.0,2837.0
7,0.1,93,100,0.9,-7,-25,3021.0,2996.0
8,0.5,3,100,0.5,-6,-28,3105.0,3077.0
9,0.4,12,100,0.6,-10,-32,3146.5,3114.5


In [25]:
ideal_policy = data[(0.9, 0.1, 100)]['Q']

In [53]:
policy = {
    "action": [],
    "incoming": [],
    "lights": [],
    "left": [],
    "right": [],
    "planner": []
}
for k,v in ideal_policy.iteritems():
    action = max(v.iteritems(), key=operator.itemgetter(1))[0]
    if k[4] != action:
        policy["action"].append(action)
        policy["incoming"].append(k[0])
        policy["lights"].append(k[1])
        policy["left"].append(k[2])
        policy["right"].append(k[3])
        policy["planner"].append(k[4])
        
pd.DataFrame(policy, columns=['incoming', 'lights', 'left', 'right', 'planner', 'action'])

Unnamed: 0,incoming,lights,left,right,planner,action
0,forward,red,,,forward,
1,,green,,left,left,right
2,,green,,forward,left,right
3,,red,,,,forward
4,left,red,,,right,forward
5,left,green,,,right,forward
6,,red,,forward,left,forward
7,,red,,,left,
8,,green,forward,,left,right
9,,red,right,,forward,


In [52]:
policy = {
    "action": [],
    "incoming": [],
    "lights": [],
    "left": [],
    "right": [],
    "planner": []
}
for k,v in ideal_policy.iteritems():
    action = max(v.iteritems(), key=operator.itemgetter(1))[0]
    if k[4] == action:
        policy["action"].append(action)
        policy["incoming"].append(k[0])
        policy["lights"].append(k[1])
        policy["left"].append(k[2])
        policy["right"].append(k[3])
        policy["planner"].append(k[4])
        
pd.DataFrame(policy, columns=['incoming', 'lights', 'left', 'right', 'planner', 'action'])

Unnamed: 0,incoming,lights,left,right,planner,action
0,,green,,,left,left
1,left,green,,,forward,forward
2,,green,right,,forward,forward
3,forward,red,,,right,right
4,,green,left,,left,left
5,,red,,left,right,right
6,,green,,,forward,forward
7,,green,,left,forward,forward
8,forward,green,,,forward,forward
9,,green,,,right,right
