# Exercise 4 [Cart-Pole Model]

In [3]:
# a few packages we need to import
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
# plt.rcParams['animation.ffmpeg_path'] = 'D:/ffmpeg/bin/ffmpeg'
import matplotlib.animation as animation
import IPython

# Problem
We are interested in computing a linear optimal control law to stabilize the cart pole system when subjected to small
deviations from the rest position (when $x=0$, $v=0$, $\theta = \pi$ and $\omega = 0$)

<img src="cart_pole.png" alt="drawing" width="400"/>

The dynamic equations of the Cart-Pole model are written as
$$\begin{eqnarray} \dot{x} &=& v \\ 
\dot{v} &=& \frac{f + m_p \sin\theta (l \omega^2 + g \cos \theta)}{m_c + m_p \sin^2 \theta} \\
\dot{\theta} &=& \omega \\
\dot{\omega} &=& \frac{-f\cos\theta -m_p l \omega^2 \cos\theta\sin\theta - (m_c + m_p)g \sin\theta}{l(m_c + m_p \sin^2 \theta)}
\end{eqnarray}$$

which we discretize using a time step $\Delta t$
$$ \begin{eqnarray} 
x_{n+1} &=& x_n + \Delta t\cdot v_n \\ 
v_{n+1} &=& v_n + \Delta t \cdot \left( \frac{f_n + m_p \sin\theta_n (l \omega_n^2 + g \cos \theta_n)}{m_c + m_p \sin^2 \theta_n} \right)\\
\theta_{n+1} &=& \theta_n + \Delta t \cdot \omega_n \\
\omega_{n+1} &=& \omega_n + \Delta t \cdot \left( \frac{-f_n\cos\theta_n -m_p l \omega_n^2 \cos\theta_n\sin\theta_n - (m_c + m_p)g \sin\theta_n}{l(m_c + m_p \sin^2 \theta_n)}\right)
\end{eqnarray}$$

This is the equations we use to simulate the dynamics of the system below

## Simulation of the Cart-Pole model and display functions

In [4]:
def simulate_cart_pole(x0, K, uff, horizon_length, mp=1., mc=5., l=1., g=9.81):
    """
    This function integrates the cart-pole system (the nonlinear system) for horizon_length steps
    
    Arguments:
    x0: numpy vector, initial value for the system (4 numbers for x,v,theta,omega)
    K: a list of control gains (of length horizon_length)
    uff: a list of feedforward control inputs such that the control will be f[i]=K[i] * x[i] + uff[i]
    horizon_length: lenght of the horizon to integrate
    
    Returns:
    x a numpy array containing the integration result
    u a numpy array containing the control at each step
    """
    delta_t = 0.01
        
    x=np.empty([4, horizon_length+1])
    x[:,0] = x0
    
    u=np.empty([horizon_length])

    for i in range(horizon_length):
        u[i] = K[i].dot(x[:,i]) + uff[0,i]
        dx = np.array([x[1,i],
                      (u[i] + mp*np.sin(x[2,i])*(l*(x[3,i]**2) + g * np.cos(x[2,i])))/(mc+mp*np.sin(x[2,i])**2),
                      x[3,i],
                      (-u[i]*np.cos(x[2,i])-mp*l*(x[3,i]**2)*np.cos(x[2,i])*np.sin(x[2,i])-(mp+mc)*g*np.sin([x[2,i]]))/(l*(mc+mp*np.sin(x[2,i])**2))
                       ])
        x[:,i+1] = x[:,i] + delta_t * dx
    return x, u

In [5]:
def animate_cart_pole(x):
    """
    This function makes an animation showing the behavior of the cart-pole
    takes as input the result of a simulation
    """
    
    #subsample
    plotx = x[:,0::5]
    
    fig = matplotlib.figure.Figure(figsize=[6,2.3])
    matplotlib.backends.backend_agg.FigureCanvasAgg(fig)
    ax = fig.add_subplot(111, autoscale_on=False, xlim=[-3,3], ylim=[-1.,1.3])
    ax.grid()
    
    list_of_lines = []
    
    #create the cart pole
    line, = ax.plot([], [], 'k', lw=2)
    list_of_lines.append(line)
    line, = ax.plot([], [], 'k', lw=2)
    list_of_lines.append(line)
    line, = ax.plot([], [], 'k', lw=2)
    list_of_lines.append(line)
    line, = ax.plot([], [], 'k', lw=2)
    list_of_lines.append(line)
    line, = ax.plot([], [], 'k', lw=2)
    list_of_lines.append(line)
    
    cart_length = 0.5
    cart_height = 0.25
    
    def animate(i):
        for l in list_of_lines: #reset all lines
            l.set_data([],[])
        
        x_back = plotx[0,i] - cart_length
        x_front = plotx[0,i] + cart_length
        y_up = cart_height
        y_down = 0.
        x_pend = plotx[0,i] + np.sin(plotx[2,i])
        y_pend = cart_height - np.cos(plotx[2,i])
        
        list_of_lines[0].set_data([x_back, x_front], [y_down, y_down])
        list_of_lines[1].set_data([x_front, x_front], [y_down, y_up])
        list_of_lines[2].set_data([x_back, x_front], [y_up, y_up])
        list_of_lines[3].set_data([x_back, x_back], [y_down, y_up])
        list_of_lines[4].set_data([plotx[0,i], x_pend], [cart_height, y_pend])
        
        return list_of_lines
    
    def init():
        return animate(0)


    ani = animation.FuncAnimation(fig, animate, np.arange(0, len(plotx[0,:])),
        interval=50, blit=True, init_func=init)
    plt.close(fig)
    plt.close(ani._fig)
    IPython.display.display_html(IPython.core.display.HTML(ani.to_html5_video()))

In [6]:
def solve_ricatti_equations(A,B,Q,R,horizon_length):
    """
    This function solves the backward Riccatti equations for regulator problems of the form
    min xQx + sum(xQx + uRu) subject to xn+1 = Axn + Bun
    
    Arguments:
    A, B, Q, R: numpy arrays defining the problem
    horizon_length: length of the horizon
    
    Returns:
    P: list of numpy arrays containing Pn from N to 0
    K: list of numpy arrays containing Kn from N-1 to 0
    """
    P = [1]*horizon_length #will contain the list of Ps from N to 0
    K = [1]*(horizon_length-1) #will contain the list of Ks from N-1 to 0

    N = horizon_length-1
    P[N] = Q
    for n in range(N-1,-1,-1):
        K[n] = -np.linalg.pinv( R+B.T.dot(P[n+1]).dot(B) ).dot(B.T.dot(P[n+1]).dot(A))
        P[n] = Q + A.T.dot(P[n+1]).dot(A) + A.T.dot(P[n+1]).dot(B).dot(K[n])
    
    return P,K

In [7]:
length = 1001;
Q = np.mat([ [10,0,0,0],[0,1,0,0],[0,0,10,0],[0,0,0,1] ])
R = 0.1

# initial conditions
x0 = np.array([0.2,0.1,np.pi-0.2,0.])

# we simulate the system
mp=1.
mc=5.
l=1.
g=9.81

A1 = np.mat([ [1,0.01,0,0],[0,1,mp*g*0.01/mc,0],[0,0,1,0.01],[0,0,(mc+mp)*g*0.01/(l*mc),1] ])
B1 = np.mat([ 0,0.01/mc,0,0.01/(l*mc) ]).T
[ P,K ] = solve_ricatti_equations(A1,B1,Q,R,length)
J0_x0 = x0.T.dot(P[0]).dot(x0)

## Example simulation
Example of a simulation of the cart-pole without any control input
here we set the control gains to be 0 and uff to be 0 as well

In [12]:
horizon_length = 1000 #this will simulate 10 seconds (since deltat = 0.01)

# here we set gains to 0
# K = [0,0.,0,0]
# K = K*horizon_length
uff = np.mat([[0], [0], [-np.pi], [0]])
uff = K * uff


# initial conditions
x0 = np.array([0.2,0.1,np.pi-0.3,0.])

# we simulate the system
mp=1.
mc=5.
l=1.
g=9.81
x,u = simulate_cart_pole(x0, K, uff, horizon_length, mp=1., mc=5., l=1., g=9.81)

# plot x and theta as a function of time
plt.figure()
plt.subplot(2,1,1)
plt.plot(x[0,:])
plt.ylabel(r'$\theta$')
plt.subplot(2,1,2)
plt.plot(x[2,:])
plt.ylabel('x')
plt.xlabel('Time')

# make an animation of the cart-pole
animate_cart_pole(x)

<IPython.core.display.Javascript object>

# Question 1
Linearize the discretized equations of the Cart-Pole Model around the resting position $\bar{x}=0$, $\bar{v}=0$, $\bar{\theta}=\pi$ and $\bar{\omega}=0$, $\bar{f}=0$ and write the dynamic equations in the form 

$$ \begin{bmatrix} \tilde{x}_{n+1} \\ \tilde{v}_{n+1} \\ \tilde{\theta}_{n+1} \\ \tilde{\omega}_{n+1} \end{bmatrix} = A \begin{bmatrix} \tilde{x}_{n} \\ \tilde{v}_{n} \\ \tilde{\theta}_{n} \\ \tilde{\omega}_{n} \end{bmatrix} + B \tilde{f}_n$$

where $A$ and $B$ are matrices of appropriate size and $\tilde{x}_n = x_n - \bar{x}$, $\tilde{v}_n = v_n - \bar{v}$, $\tilde{\theta}_n = \theta_n - \bar{\theta}$, $\tilde{\omega}_n = \omega_n - \bar{\omega}$ and $\tilde{f}_n = f_n - \bar{f}$. Remember that the resting position is a fixed point, i.e.

$$ \begin{bmatrix} \bar{x} \\ \bar{v} \\ \bar{\theta} \\ \bar{\omega} \end{bmatrix} = f(\bar{x}, \bar{v}, \bar{\theta}, \bar{\omega},  \bar{f})$$
where $f$ is the discretized dynamic equation of the Cart-Pole Model

# Question 2
We would like to use the linearized equation to compute an optimal controller that stabilizes the system around the resting position. 
1. Write a cost function that will help stabilize the resting position (be precise with the variables you use).
2. How would you (approximately) solve this optimal control problem?
3. What will be the form of the optimal controller $f_n$ (remember that the linearized system will give you $\tilde{f}_n$)?

# Question 3
1. Write a function (e.g. take example from Exercise 3) that computes the optimal control for the linearized problem. 2. Simulate the dynamics of the Cart-Pole Model with the computed controller for initial conditions $x=0.2$, $v=0.1$, $\theta=\pi-0.2$ and $\omega=0$. What happens? (show a plot of the states of the system and the cart-pole animation)
3. How can you change the response of the  system (how fast it stabilizes)? Give an example with a simulation.
4. Does the controller still work when $x=0.2$, $v=0.1$, $\theta=0.2$ and $\omega=0$? Why? (show a plot of the states of the system and the cart-pole animation, use a smaller horizon if necessary)

In [9]:
######## Question 3(2) change R to 1, response is slower.##########

length = 1001;
Q = np.mat([ [10,0,0,0],[0,1,0,0],[0,0,10,0],[0,0,0,1] ])
R = 1
# x0 = np.mat([0,0,0,0]).T

# initial conditions
x0 = np.array([0.2,0.1,np.pi-0.2,0.])

# we simulate the system
mp=1.
mc=5.
l=1.
g=9.81

A1 = np.mat([ [1,0.01,0,0],[0,1,mp*g*0.01/mc,0],[0,0,1,0.01],[0,0,(mc+mp)*g*0.01/(l*mc),1] ])
B1 = np.mat([ 0,0.01/mc,0,0.01/(l*mc) ]).T
[ P,K ] = solve_ricatti_equations(A1,B1,Q,R,length)
J0_x0 = x0.T.dot(P[0]).dot(x0)


horizon_length = 1000 #this will simulate 10 seconds (since deltat = 0.01)

# here we set gains to 0
# K = [0,0.,0,0]
# K = K*horizon_length
uff = np.mat([[0], [0], [-np.pi], [0]])
uff = K * uff


# we simulate the system
mp=1.
mc=5.
l=1.
g=9.81
x,u = simulate_cart_pole(x0, K, uff, horizon_length, mp=1., mc=5., l=1., g=9.81)

# plot x and theta as a function of time
plt.figure()
plt.subplot(2,1,1)
plt.plot(x[0,:])
plt.ylabel(r'$\theta$')
plt.subplot(2,1,2)
plt.plot(x[2,:])
plt.ylabel('x')
plt.xlabel('Time')

# make an animation of the cart-pole
animate_cart_pole(x)

<IPython.core.display.Javascript object>

In [10]:
######## Question 3(3) when  𝑥=0.2 ,  𝑣=0.1 ,  𝜃=0.2  and  𝜔=0##########
#################### Can't work !!!!! ##################

length = 1001;
Q = np.mat([ [10,0,0,0],[0,1,0,0],[0,0,10,0],[0,0,0,1] ])
R = 1
# x0 = np.mat([0,0,0,0]).T

# initial conditions
x0 = np.array([0.2,0.1,0.2,0.])

# we simulate the system
mp=1.
mc=5.
l=1.
g=9.81

A1 = np.mat([ [1,0.01,0,0],[0,1,mp*g*0.01/mc,0],[0,0,1,0.01],[0,0,(mc+mp)*g*0.01/(l*mc),1] ])
B1 = np.mat([ 0,0.01/mc,0,0.01/(l*mc) ]).T
[ P,K ] = solve_ricatti_equations(A1,B1,Q,R,length)
J0_x0 = x0.T.dot(P[0]).dot(x0)

horizon_length = 1000 #this will simulate 10 seconds (since deltat = 0.01)

# here we set gains to 0
# K = [0,0.,0,0]
# K = K*horizon_length
uff = np.mat([[0], [0], [-3.1415926], [0]])
uff = K * uff

# we simulate the system
mp=1.
mc=5.
l=1.
g=9.81
x,u = simulate_cart_pole(x0, K, uff, horizon_length, mp=1., mc=5., l=1., g=9.81)

# plot x and theta as a function of time
plt.figure()
plt.subplot(2,1,1)
plt.plot(x[0,:])
plt.ylabel(r'$\theta$')
plt.subplot(2,1,2)
plt.plot(x[2,:])
plt.ylabel('x')
plt.xlabel('Time')

# make an animation of the cart-pole
animate_cart_pole(x)



<IPython.core.display.Javascript object>