# Dam et al. - Credit Assignment during Movement Reinforcement Learning

In [1]:
import numpy as np 
import matplotlib.pyplot as plt 
import ipywidgets as widgets
from ipywidgets import fixed

## Trajectory 

Target trajectory: $x = \alpha y + \beta sin(\pi y)$

In [8]:
def plotTrajectory(y, alpha, beta):
    '''
    Plot invisible trajectory 

    INPUTS:
    y: y values 
    alpha: alpha parameter value (direction) 
    beta: beta paramter value (curvature)

    OUTPUT:
    Figure with invisible trajectory 
    '''
    
    x = alpha * y + beta * np.sin(np.pi * y) # divide y by some factor to cut off sine-wave (max y-value works well). May need to also increase bounds of beta term 
    trajectory = np.array([x, y])

    plt.figure()
    plt.plot(trajectory[0,:], trajectory[1,:], label='invisible')
    plt.title('Trajectory with' + r' $\alpha$ = ' + f'{alpha:.2f} and' + r' $\beta$ = ' + f'{beta:.2f}')
    plt.xlim([-7, 7])
    plt.xlabel('X (cm)')
    plt.ylabel('Y (cm)')
    plt.ylim([0, 7])
    plt.legend(loc=0)
    plt.show()

In [7]:
_ = widgets.interact(plotTrajectory, y=fixed(np.linspace(0, 7, 500)), alpha=(-1,1,0.1), beta = (-1,1,0.1))

interactive(children=(FloatSlider(value=0.0, description='alpha', max=1.0, min=-1.0), FloatSlider(value=0.0, d…

In [4]:
def plotTrajectories(y, alphaT, betaT, alphaG, betaG, condition):
    '''
    Plot invisible trajectory 

    INPUTS:
    y: y values 
    alphaT/alphaG: alpha parameter value (direction) [T: true, G: guess]
    betaT/betaG: beta paramter value (curvature) [T: true, G: guess]
    condition: type of condition, ['alpha', 'beta']

    OUTPUT:
    Figure with invisible trajectory 
    '''

    # trajectories
    traject = lambda alpha, beta, y: alpha * y + beta * np.sin(np.pi * y)
    xI = traject(alphaT, betaT, y)
    xG = traject(alphaG, betaG, y)

    trajectoryI = np.array([xI, y])
    trajectoryG = np.array([xG, y])

    # reward 
    if condition == 'alpha':
        reward_calc = lambda deltaA, deltaB, W, w: 50 * (1 - (deltaA * W + deltaB * w))
    elif condition == 'beta':
        reward_calc = lambda deltaA, deltaB, W, w: 50 * (1 - (deltaA * w + deltaB * W))


    plt.figure()
    # trajectories 
    plt.plot(trajectoryI[0,:], trajectoryI[1,:], label='invisible')
    plt.plot(trajectoryG[0,:], trajectoryG[1,:], label='trial')
    # reward 
    dA = np.abs(alphaT - alphaG)
    dB = np.abs(betaT - betaG)
    if dA != 0 and dB != 0:
        dA_norm = dA / (dA + dB)
        dB_norm = dB / (dA + dB)
    else:
        dA_norm = dA
        dB_norm = dB
    reward = reward_calc(dA_norm, dB_norm, 0.8, 0.2)
    
    plt.text(-5, 0.5, f'{reward:.0f} cents')
    plt.title('Invisible:'+ r' $\alpha$ = ' + f'{alphaT:.1f} and' + r' $\beta$ = ' + f'{betaT:.1f}' + '\n' + 'Guess:'+ r' $\alpha$ = ' + f'{alphaG:.1f} and' + r' $\beta$ = ' + f'{betaG:.1f}' )
    plt.xlim([-7, 7])
    plt.xlabel('X (cm)')
    plt.ylabel('Y (cm)')
    plt.ylim([0, 7])
    plt.legend(loc='lower right')
    plt.show()

In [5]:
_ = widgets.interact(plotTrajectories, y=fixed(np.linspace(0, 7, 500)), alphaT=(-1,1,0.1), betaT=(-1,1,0.1), alphaG=(-1,1.1,0.1), betaG=(-1,1.1,0.1), condition=['alpha', 'beta'])

interactive(children=(FloatSlider(value=0.0, description='alphaT', max=1.0, min=-1.0), FloatSlider(value=0.0, …