In [1]:
#log domain ricatti recursion for bound constraint on controls 
#double integrator example
using Pkg
Pkg.activate(".")

[32m[1m  Activating[22m[39m project at `~/Research/log_domain_ipsolver`


In [2]:
#double check the linesearch in this code... may be wrong

In [3]:
# here x will be the state [position; velocity ]
# u will be the acceleration, σ is the size of the inequality constraints and dependent on it is the size of the control 

In [4]:
#the only constraint is a set of 2 inequality constraints on the acceleration 

In [5]:
using LinearAlgebra
using ForwardDiff 
using Plots 
using SparseArrays 

In [6]:
#indexing 

function create_idx(nx,nu,N)
    # This function creates some useful indexing tools for Z 
    # x_i = Z[idx.x[i]]
    # u_i = Z[idx.u[i]]
    
    #initial condition is assumed to be given 

    #Z vecotr is [u1, x2, u2, x3, u3, x4...]

    nz = (N-1) * nu + (N-1) * nx # length of Z 

    #σ will be  a part of u. for this example there are 2 sigmas for every u because of the 2 constraints associated to every control input 
    u = [(i - 1) * (nu + nx) .+ (1 : nu) for i = 1:(N-1)]

    x = [(i - 1) * (nu + nx) .+ ((nu + 1):(nu + nx)) for i = 1:(N - 1)]

    #indices for the lagrange multiplier corresponding to the dynamics constraints
    #this lagrange multipler should be equal to the size of the state
    #it is size (nx, N-1)

    μ = [(i-1)*(nx) .+ ((nz+1):(nz+nx)) for i = 1:(N-1)]


    #indices for the slack variables corresponding to the inequality constraints 
    #there is (N-1)*nu*n_ineq because there is one for each control value has n_ineq constraints 

    #σ = [(i-1)*(nu) .+ ((μ[end][end] + 1):(μ[end][end]+nu)) for i=1:nu*n_ineq*(N-1) ]

    #nσ is the total number of inequality constraints in the problem 

    return (nx=nx,nu=nu,N=N,nz=nz,x= x,u = u, μ = μ, nμ = nx*(N-1))
end

create_idx (generic function with 1 method)

In [7]:
#here the state will be Δx, Δu in a Δz vector, the lagrange multipliers Δμ, and finally the slack for the inequalities
#with the log domain substitutions Δσ

In [8]:
# Discrete dynamics
h = 0.1   # time step

#for one timestep
A = [1 h; 0 1]
B_normal = [0.5*h*h; h]

#B will be a 2x3 matrix now since every u containts 2 sigmas 
B = [B_normal zeros(2,2)]

2×3 Matrix{Float64}:
 0.005  0.0  0.0
 0.1    0.0  0.0

In [9]:
n_ineq = 2
nx = 2     # number of state
#number of true controls 
nc = 1

#size of agumented control vector with the sigmas 
nu = 1 + n_ineq    

#this is the number of inequality constraint per control step 
#since this is just for bounds, there are two inequalities 
#Tfinal = 10.0 # final time (orignal )

#will make it shorter...
Tfinal = 5.0 # final time (testing )

N = Int(Tfinal/h)+1    # number of time steps
thist = Array(range(0,h*(N-1), step=h));

In [10]:
idx = create_idx(nx,nu,N)

(nx = 2, nu = 3, N = 51, nz = 250, x = UnitRange{Int64}[4:5, 9:10, 14:15, 19:20, 24:25, 29:30, 34:35, 39:40, 44:45, 49:50  …  204:205, 209:210, 214:215, 219:220, 224:225, 229:230, 234:235, 239:240, 244:245, 249:250], u = UnitRange{Int64}[1:3, 6:8, 11:13, 16:18, 21:23, 26:28, 31:33, 36:38, 41:43, 46:48  …  201:203, 206:208, 211:213, 216:218, 221:223, 226:228, 231:233, 236:238, 241:243, 246:248], μ = UnitRange{Int64}[251:252, 253:254, 255:256, 257:258, 259:260, 261:262, 263:264, 265:266, 267:268, 269:270  …  331:332, 333:334, 335:336, 337:338, 339:340, 341:342, 343:344, 345:346, 347:348, 349:350], nμ = 100)

In [11]:
# Initial conditions
#old initial condtion 

#not even log domain newton can solve with this initial condition and limits of 20. probably conditioning on σ
#x0 = [10.0; 0] 

# Initial conditions 
#testing 
x0 = [1.0; 0.0]

#works with this 
#μ0 = [0.0; 0.0]

#put values on μ to test out the residual function 
μ0 = [0.0; 0.0]

u0 = [0.5; 0.01; 0.02]

#try new initial guess 
# u0 = [0.0, 0.0, 0.0]

#initial barrier parameter 
ρ0 = 0.1

#define control bounds 
#umin = -20.0
#umax = 20.0

#try smaller values for the bounds? 
#residual does not go down to tight tolerances with large bounds...
#umin = -2.5
#umax = 2.5

umin = -1.5
umax = 1.5

1.5

In [12]:
#bound constraints for this example 
# Gu - h - s >= 0 

G = [1.0; -1.0] 

h = [umin; -umax]

2-element Vector{Float64}:
 -1.5
 -1.5

In [13]:
zeros(idx.nμ)

100-element Vector{Float64}:
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 ⋮
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

In [14]:
#create an initial state/control trajectory 

#pass in indices, and initial state 
function create_initial_states(idx, x0)

    #slacks are part of nz now 
    z0 = zeros(idx.nz)

    initial_states = zeros(2, N)

    initial_states[:,1] = x0

    #will just rollout the initial state with no control input as the initial guess trajectory 
    for i=1:N-1

        initial_states[:, i+1] = A*initial_states[:,i] + B*u0

    end

    #save the states in z
    for i=1:N-1

        z0[idx.u[i]] = u0
        z0[idx.x[i]] = initial_states[:,i+1]
    
    end

    #initial guess for the lagrange multipliers  . initialize as zeros 
    μ0 = zeros(idx.nμ)


    return z0, μ0
    
end

create_initial_states (generic function with 1 method)

In [15]:
size(idx.x) 

(50,)

In [16]:
function create_w(idx, xhist, uhist, μhist)

    w = zeros(idx.nz + idx.nμ)

    #fill in x
    #we do not include x1 in the state
    #starting at x2
    for i=1:size(idx.x)[1]

        w[idx.x[i]] = xhist[:,i+1]

    end

    #fill in u
    for i=1:size(idx.u)[1]

        w[idx.u[i]] = uhist[:, i]

    end


    #fill in μ
    for i=1:size(idx.μ)[1]

        w[idx.μ[i]] = μhist[:,i]

    end

    return w

end

create_w (generic function with 1 method)

In [17]:
z0, μ0  = create_initial_states(idx, x0) 

([0.5, 0.01, 0.02, 1.0025, 0.05, 0.5, 0.01, 0.02, 1.0099999999999998, 0.1  …  0.5, 0.01, 0.02, 7.002500000000005, 2.4499999999999993, 0.5, 0.01, 0.02, 7.250000000000005, 2.499999999999999], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])

In [18]:
#stack all the initial states into one vector w 
w0 = [z0; μ0]

350-element Vector{Float64}:
 0.5
 0.01
 0.02
 1.0025
 0.05
 0.5
 0.01
 0.02
 1.0099999999999998
 0.1
 ⋮
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

In [19]:
original_w0 = [z0; μ0]

350-element Vector{Float64}:
 0.5
 0.01
 0.02
 1.0025
 0.05
 0.5
 0.01
 0.02
 1.0099999999999998
 0.1
 ⋮
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

In [20]:
# Cost weights
Q = Array(1.0*I(idx.nx))
R_1 = 0.1 #Array(1.0*I(1))
Qn = Array(1.0*I(idx.nx))

2×2 Matrix{Float64}:
 1.0  0.0
 0.0  1.0

In [21]:
[R_1 zeros(1,2); G zeros(2,2)]

3×3 Matrix{Float64}:
  0.1  0.0  0.0
  1.0  0.0  0.0
 -1.0  0.0  0.0

In [22]:
#this makes H of the true cost function 

function create_H_res(idx, z, ρ)

    #creat H and fill it in 
    H = zeros(idx.nz, idx.nz)

    for i = 1:N-1

        #size 3x2 for this example 
        #first is for the stationarity condition 
        #other two rows are for the control bound constraint 
        R_bar_res = [R_1 zeros(1,2); G zeros(2,2)]
        
        #indexed 1x2 because this expresses 2 KKT conditions and it is a function of 3 variables 
        H[idx.u[i], idx.u[i]] = R_bar_res
    
        H[idx.x[i], idx.x[i]] = Q
    
    end

    #fill in terminal cost matrix if it is different than the stage cost Q

    H[idx.x[end], idx.x[end]] = Qn 

    return H 

end

create_H_res (generic function with 1 method)

In [23]:
#H will be changing now because the R terms are dependent on the value of σ
#this is the jacobian of the kkt residual function that we will solve using ricatti 
function create_H(idx, z, ρ)

    #creat H and fill it in 
    H = zeros(idx.nz, idx.nz)

    for i = 1:N-1

        #size 3x2 for this example 
        #first is for the stationarity condition 
        #other two rows are for the control bound constraint 
        R_bar = [R_1 G'*diagm(sqrt(ρ)*exp.(-z[idx.u[i]][nc+1:end])); G diagm(-sqrt(ρ)*exp.(z[idx.u[i]][nc+1:end]))]
        
        #indexed 1x2 because this expresses 2 KKT conditions and it is a function of 3 variables 
        H[idx.u[i], idx.u[i]] = R_bar
    
        H[idx.x[i], idx.x[i]] = Q
    
    end

    #fill in terminal cost matrix if it is different than the stage cost Q

    H[idx.x[end], idx.x[end]] = Qn 

    return H 

end

create_H (generic function with 1 method)

In [24]:
#this matrix is checked
H_test =  create_H(idx, z0, ρ0)

250×250 Matrix{Float64}:
  0.1   0.313081  -0.309966  0.0  0.0  …   0.0        0.0       0.0  0.0
  1.0  -0.319406   0.0       0.0  0.0      0.0        0.0       0.0  0.0
 -1.0   0.0       -0.322616  0.0  0.0      0.0        0.0       0.0  0.0
  0.0   0.0        0.0       1.0  0.0      0.0        0.0       0.0  0.0
  0.0   0.0        0.0       0.0  1.0      0.0        0.0       0.0  0.0
  0.0   0.0        0.0       0.0  0.0  …   0.0        0.0       0.0  0.0
  0.0   0.0        0.0       0.0  0.0      0.0        0.0       0.0  0.0
  0.0   0.0        0.0       0.0  0.0      0.0        0.0       0.0  0.0
  0.0   0.0        0.0       0.0  0.0      0.0        0.0       0.0  0.0
  0.0   0.0        0.0       0.0  0.0      0.0        0.0       0.0  0.0
  ⋮                                    ⋱                             
  0.0   0.0        0.0       0.0  0.0      0.0        0.0       0.0  0.0
  0.0   0.0        0.0       0.0  0.0      0.0        0.0       0.0  0.0
  0.0   0.0        0.0       

In [25]:
#defining all LQR problem as a QP, write down concatanated matrices C and G, as well as their corresponding vectors 
#Cz = d 
#Gz -h >= 0

#for this example C is [200, 500] and d is size 200
C = zeros((N-1)*nx , idx.nz) 

#negative identity at each of the state indices
for i=1:size(idx.x)[1]

    C[(i-1)*nx.+(1 : nx), idx.x[i]] = -1.0 *Matrix(I, idx.nx, idx.nx)

end

#initialize the first row 
C[1:nx, idx.u[1]] = B 

for i=1:size(idx.x)[1]-1

    #A matrix at each of the state indices, starting at row 2
    C[i*nx.+(1 : nx), idx.x[i]] = A

    #B matrix at each of the control indices, starting at row 2
    C[i*nx.+(1 : nx), idx.u[i+1]] = B

end

#create d 
d = zeros(nx*(N-1))
d[1:2] = -A*x0 

2-element Vector{Float64}:
 -1.0
  0.0

In [26]:
A 

2×2 Matrix{Float64}:
 1.0  0.1
 0.0  1.0

In [27]:
C 

100×250 Matrix{Float64}:
 0.005  0.0  0.0  -1.0  -0.0  0.0    …   0.0  0.0    0.0  0.0   0.0   0.0
 0.1    0.0  0.0  -0.0  -1.0  0.0        0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   1.0   0.1  0.005      0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   0.0   1.0  0.1        0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   0.0   0.0  0.0        0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   0.0   0.0  0.0    …   0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   0.0   0.0  0.0        0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   0.0   0.0  0.0        0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   0.0   0.0  0.0        0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   0.0   0.0  0.0        0.0  0.0    0.0  0.0   0.0   0.0
 ⋮                            ⋮      ⋱        ⋮                      
 0.0    0.0  0.0   0.0   0.0  0.0        0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   0.0   0.0  0.0        0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0 

In [28]:
C 

100×250 Matrix{Float64}:
 0.005  0.0  0.0  -1.0  -0.0  0.0    …   0.0  0.0    0.0  0.0   0.0   0.0
 0.1    0.0  0.0  -0.0  -1.0  0.0        0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   1.0   0.1  0.005      0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   0.0   1.0  0.1        0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   0.0   0.0  0.0        0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   0.0   0.0  0.0    …   0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   0.0   0.0  0.0        0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   0.0   0.0  0.0        0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   0.0   0.0  0.0        0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   0.0   0.0  0.0        0.0  0.0    0.0  0.0   0.0   0.0
 ⋮                            ⋮      ⋱        ⋮                      
 0.0    0.0  0.0   0.0   0.0  0.0        0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0   0.0   0.0  0.0        0.0  0.0    0.0  0.0   0.0   0.0
 0.0    0.0  0.0 

In [29]:
C' 

250×100 adjoint(::Matrix{Float64}) with eltype Float64:
  0.005   0.1   0.0     0.0  0.0  0.0  …  0.0  0.0   0.0   0.0   0.0     0.0
  0.0     0.0   0.0     0.0  0.0  0.0     0.0  0.0   0.0   0.0   0.0     0.0
  0.0     0.0   0.0     0.0  0.0  0.0     0.0  0.0   0.0   0.0   0.0     0.0
 -1.0    -0.0   1.0     0.0  0.0  0.0     0.0  0.0   0.0   0.0   0.0     0.0
 -0.0    -1.0   0.1     1.0  0.0  0.0     0.0  0.0   0.0   0.0   0.0     0.0
  0.0     0.0   0.005   0.1  0.0  0.0  …  0.0  0.0   0.0   0.0   0.0     0.0
  0.0     0.0   0.0     0.0  0.0  0.0     0.0  0.0   0.0   0.0   0.0     0.0
  0.0     0.0   0.0     0.0  0.0  0.0     0.0  0.0   0.0   0.0   0.0     0.0
  0.0     0.0  -1.0    -0.0  1.0  0.0     0.0  0.0   0.0   0.0   0.0     0.0
  0.0     0.0  -0.0    -1.0  0.1  1.0     0.0  0.0   0.0   0.0   0.0     0.0
  ⋮                               ⋮    ⋱       ⋮                        
  0.0     0.0   0.0     0.0  0.0  0.0     0.0  0.0   0.0   0.0   0.0     0.0
  0.0     0.0   0.0     

In [30]:
B_normal 

2-element Vector{Float64}:
 0.005000000000000001
 0.1

In [31]:
#test out the constraint 
constraint_test = C*z0 - d
#test if all are equal to zero 
all(constraint_test .== 0) 

#checked 

true

In [32]:
idx.u 

50-element Vector{UnitRange{Int64}}:
 1:3
 6:8
 11:13
 16:18
 21:23
 26:28
 31:33
 36:38
 41:43
 46:48
 ⋮
 206:208
 211:213
 216:218
 221:223
 226:228
 231:233
 236:238
 241:243
 246:248

In [33]:
G 

2-element Vector{Float64}:
  1.0
 -1.0

In [34]:
H_test*z0 

250-element Vector{Float64}:
  0.04693149173746914
  0.4968059409209804
 -0.5064523198128809
  1.0025
  0.05
  0.04693149173746914
  0.4968059409209804
 -0.5064523198128809
  1.0099999999999998
  0.1
  ⋮
  0.4968059409209804
 -0.5064523198128809
  7.002500000000005
  2.4499999999999993
  0.04693149173746914
  0.4968059409209804
 -0.5064523198128809
  7.250000000000005
  2.499999999999999

In [35]:
C' 

250×100 adjoint(::Matrix{Float64}) with eltype Float64:
  0.005   0.1   0.0     0.0  0.0  0.0  …  0.0  0.0   0.0   0.0   0.0     0.0
  0.0     0.0   0.0     0.0  0.0  0.0     0.0  0.0   0.0   0.0   0.0     0.0
  0.0     0.0   0.0     0.0  0.0  0.0     0.0  0.0   0.0   0.0   0.0     0.0
 -1.0    -0.0   1.0     0.0  0.0  0.0     0.0  0.0   0.0   0.0   0.0     0.0
 -0.0    -1.0   0.1     1.0  0.0  0.0     0.0  0.0   0.0   0.0   0.0     0.0
  0.0     0.0   0.005   0.1  0.0  0.0  …  0.0  0.0   0.0   0.0   0.0     0.0
  0.0     0.0   0.0     0.0  0.0  0.0     0.0  0.0   0.0   0.0   0.0     0.0
  0.0     0.0   0.0     0.0  0.0  0.0     0.0  0.0   0.0   0.0   0.0     0.0
  0.0     0.0  -1.0    -0.0  1.0  0.0     0.0  0.0   0.0   0.0   0.0     0.0
  0.0     0.0  -0.0    -1.0  0.1  1.0     0.0  0.0   0.0   0.0   0.0     0.0
  ⋮                               ⋮    ⋱       ⋮                        
  0.0     0.0   0.0     0.0  0.0  0.0     0.0  0.0   0.0   0.0   0.0     0.0
  0.0     0.0   0.0     

In [36]:
#TODO 
#need to create G all 
# G_all = zeros(idx.nz, idx.nμ*n_ineq)

In [37]:
#TODO: use this as the exit condition for the ip newtons method 
# function true_kkt_conditions(w)

#     z = w[1:idx.nz]

#     μ = w[idx.nz+1:end]
    
#     #create λ 
#     λ_sol = zeros(idx.nμ*n_ineq)

#     for i=1:N-1

#         λ_sol[1:2] = sqrt
#     end

#     #create H 
#     H = create_H_res(idx, z, ρ)

#     stationarity_residual = H*z + C'*μ - G_all*λ

# end

In [38]:
#residual function for the KKT system with log domain substitutions

#right hand size of the nonlinear KKT system as well

#will be evaluating this residual function at the trajectories we are evaluating newton on 

function residual_function(w, ρ)

    #pick out each value from w 

    #contains x and u 
    #u contains the sigmas 
    z = w[1:idx.nz]
    μ = w[idx.nz+ 1: idx.nz + idx.nμ]

    #build the h for the stationarity residual

    #affine term for the stationarity residual and inequality residual 
    h_res = zeros(idx.nz)

    for i=1:N-1

        #the last 2 should be populated with h from the inequality constraint 

        #double check the signs here...
        h_res[idx.u[i][1]] = G'*(sqrt(ρ)*exp.(-z[idx.u[i]][nc+1:end]))

        h_res[idx.u[i][nc+1:end]] = (h + sqrt(ρ)*exp.(z[idx.u[i]][nc+1:end])) 

    end

    #gradient of lagrangian = 0, and the inequality constraints kkt condition in one 
    #this contains the stationarity residual and the inequality constraints Gu - h - s 

    #create H 
    H = create_H_res(idx, z, ρ)
    
    #evaluate the residual 
    
    #fix this stationarity residual 

    #the H function is the jacobian. it is not for the residual check function
    stationarity_residual = H*z + C'*μ - h_res
    
    #equality constraints
    primal_feasibility_1 = C*z - d

    residual = [stationarity_residual; primal_feasibility_1]

    #this is the residual we want to converge to in Newtons method 
    return residual

end

residual_function (generic function with 1 method)

In [39]:
#residuals checked and seem to be good...

In [40]:
function newton_step(w_newton, ρ0)

    #for i=1
        
        H_test = create_H(idx, w_newton, ρ0)

        newton_step= [H_test C'; C zeros(100,100)]\(-residual_function(w_newton, ρ0)) 

        #println(newton_step)

        #w_newton_2 = w_newton + newton_step

    #end

    return newton_step
    
end

newton_step (generic function with 1 method)

In [41]:
#newton is working
#this newton step should be the same as one backward and forward pass
newton_step_1 = newton_step(w0, ρ0)

350-element Vector{Float64}:
 -1.1492203277052568
  1.6636316087048855
  5.661853070006606
 -0.005746101638527623
 -0.11492203277052511
 -1.0614991443402864
  1.9382701836178329
  5.389947194572935
 -0.022545800637280067
 -0.22107194720455361
  ⋮
 -0.5324744739238828
 -0.15444813789751588
 -0.4183791037798001
 -0.13045522314982677
 -0.3076495530698678
 -0.09671481161559335
 -0.2007121645723569
 -0.053242104660454107
 -0.0980079530241314

In [42]:
newton_step_2 = newton_step(w0+newton_step_1, ρ0)

350-element Vector{Float64}:
 -1.4504564423559771
 -1.3556872136433746
 -0.9612147968875945
 -0.007252282211779537
 -0.1450456442356005
 -0.9959793386515541
 -1.0259049231022765
 -0.9567632346963489
 -0.026736743328597434
 -0.24464357810075965
  ⋮
  0.49347895743373643
  0.20544487777839474
  0.3870922214890198
  0.16721459577976894
  0.2839926731323857
  0.12030291208647367
  0.18471243680899127
  0.06460774759447509
  0.08983199119020906

In [43]:
#cost-to-go terms 
P = zeros(nx,nx,N)
q = zeros(nx, N)

#feedback terms
K = zeros(nu,nx,N-1)
f = zeros(nu, N-1)

3×50 Matrix{Float64}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0

In [44]:
#obtain the linear feedback control law as well as the cost to go 
function ricatti_recursion(w, ρ)

    #z here is the reference trajectory, here the control law is solving for deltas
    #initialize the final matrices

    z = w[1:idx.nz]

    affine_term = -residual_function(w, ρ)

    P[:,:,N] = Qn

    q[:,N] = affine_term[idx.x[end]]

    #create H 
    #jacobian version of H 
    
    H = create_H(idx, z, ρ)
        
    for k=N-1:-1:2

        R_k = H[idx.u[k], idx.u[k]]

        #add the 2 times the exponential term
        K[:,:,k] = -(R_k + B'*P[:,:,k+1]*B)\B'*P[:,:,k+1]*A

        f[:,k] = (R_k + B'*P[:,:,k+1]*B)\(B'*q[:,k+1] + affine_term[idx.u[k]])

        P[:, :, k] = (Q + A'*P[:,:,k+1]*A + A'*P[:,:,k+1]*B*K[:,:,k])

        q[:,k] = -A'*P[:,:,k+1]*B*f[:,k] + A'*q[:,k+1] + affine_term[idx.x[k-1]]


        println("condition number: ", cond(R_k + B'*P[:,:,k+1]*B))

    end

    #there is no cost to go affine term at timestep 1 because x1 is not part of the cost
    # in this formualtion, but we can still calculate the feedback gain K and affine control f 
    k = 1

    R_k = H[idx.u[k], idx.u[k]]
    #feedback and feedforward terms at index 1 
    K[:,:,k] = -(R_k + B'*P[:,:,k+1]*B)\B'*P[:,:,k+1]*A

    f[:,k] = (R_k + B'*P[:,:,k+1]*B)\(B'*q[:,k+1] + affine_term[idx.u[k]])

    return K, f, P, q
    
end

ricatti_recursion (generic function with 1 method)

In [45]:
K_test, f_test, P_test, q_test = ricatti_recursion(w0, ρ0)

condition number: [0.11002500000000001 0.3130812471718412 -0.3099660367124636; 1.0 -0.31940590790196033 0.0; -1.0 0.0 -0.32261599064404545]
condition number: [0.12020050951923057 0.3130812471718412 -0.3099660367124636; 1.0 -0.31940590790196033 0.0; -1.0 0.0 -0.32261599064404545]
condition number: [0.13062310893641735 0.3130812471718412 -0.3099660367124636; 1.0 -0.31940590790196033 0.0; -1.0 0.0 -0.32261599064404545]
condition number: [0.14137762659762704 0.3130812471718412 -0.3099660367124636; 1.0 -0.31940590790196033 0.0; -1.0 0.0 -0.32261599064404545]
condition number: [0.15253065760385684 0.3130812471718412 -0.3099660367124636; 1.0 -0.31940590790196033 0.0; -1.0 0.0 -0.32261599064404545]
condition number: [0.1641247578662729 0.3130812471718412 -0.3099660367124636; 1.0 -0.31940590790196033 0.0; -1.0 0.0 -0.32261599064404545]
condition number: [0.17617372447709628 0.3130812471718412 -0.3099660367124636; 1.0 -0.31940590790196033 0.0; -1.0 0.0 -0.32261599064404545]
condition number: [0.

([-0.6502707117978904 -1.3127489037049225; -2.0358756544900443 -4.109970639954045; 2.0156183532618472 4.069075748800464;;; -0.6499038565376124 -1.3122565339703227; -2.034727099465851 -4.108429122648263; 2.014481226551093 4.067549569848153;;; -0.6494985941918772 -1.3116995158291143; -2.033458299059505 -4.106685203304794; 2.013225050919729 4.06582300279208;;; … ;;; -0.02161487305305693 -0.14890340757792722; -0.06767211412912087 -0.4661886455263444; 0.06699876534299083 0.4615499909991071;;; -0.009691223187612907 -0.09848873847645853; -0.03034140242198516 -0.30834977074590947; 0.030039500423603017 0.30528163926358187;;; -0.0024378196168649605 -0.0490001742989857; -0.007632356060280621 -0.15341035681164045; 0.007556412848595282 0.15188389825676515], [-1.1492203277052335 -1.2160407463554714 … -0.8129331609229532 -0.6482783939631984; 1.6636316087049583 1.4544294086293477 … 2.7164836645457715 3.2319868624712607; 5.661853070006535 5.868973673411353 … 4.619477067158868 4.109103211755572], [0.0 0

In [None]:
#pass in the current tractory as well as the feedback control law 
#everything is in terms of deltas from the reference trajectory 

#x0 defined above 

#no linesearch here 

#w here is the reference 
function forward_pass(w, K, f, P, q, ρ)

    #initial step size 
    α = 1

    #state trajectory
    z = w[1:idx.nz]

    #lagrange multipliers 
    μ = w[idx.nz+ 1: idx.nz + idx.nμ]
    
    xhist = zeros(nx, N)
    xhist[:,1] = x0 

    #this is the reference trajectory based off w 
    xtraj = zeros(nx, N)
    xtraj[:,1] = x0 
    for i=2:N

        xtraj[:,i] = w[idx.x[i-1]]

    end

    uhist = zeros(nu, N-1)

    #this sigma is from 2:N
    μhist = zeros(nx, N-1)

    #the minus sign is in K 

    Δu_1 = (K[:,:,1]*(xhist[:,1] - xtraj[:,1])) + f[:,1]

    #since 1 is size 1, have to pick it out of the 1 element vector
    uhist[:,1] = w[idx.u[1]] + Δu_1

    #forward simulate first control
    xhist[:,2] = A*xhist[:,1] + B*uhist[:,1]

    #check that the step was good, if not take a smaller step



    #update the controls, lagrange multipliers, and sigmas 
    for k=2:N-1

        #KΔx + d is Δu, so we apply that to the current u reference trajectory stored in z0
        
        Δu_k = (K[:,:,k]*(xhist[:,k] - xtraj[:,k])) + f[:,k]
        
        Δμ_k = P[:,:,k]*(xhist[:,k] - xtraj[:,k]) - q[:,k] 

        uhist[:,k] = w[idx.u[k]] + Δu_k

        #apply a forward rollout on the dynamics with this control 
        xhist[:,k+1] = A*xhist[:,k] + B*uhist[:,k]

        #update the lagrage multipliers μ
        μhist[:,k-1] = w[idx.μ[k-1]] + Δμ_k 

    end

    #fill in lagrange multiplier at timestep N 
    Δμ_N = P[:,:,N]*(xhist[:,N] - xtraj[:,N]) - q[:,N] 

    #mu at the end 
    μhist[:,end] = w[idx.μ[end]] + Δμ_N



    #create w 

    #check the cost 

    #implement the armijo law 

    #if cost doesn't go down, take 1/2 of the size and run the sim again

    return xhist, uhist, μhist

end

In [None]:
#maximum iterations on the forward pass linesearch 
max_linesearch_iters = 20

In [None]:
#adding an armijo linesearch on the forward rollout

#forward pass with a linesearch 


#this probably has a bug...will comment out for now .


# function forward_pass_ls(w, K, f, P, q, ρ)

#     #state trajectory
#     z = w[1:idx.nz]

#     #lagrange multipliers 
#     μ = w[idx.nz+ 1: idx.nz + idx.nμ]
    
#     #parameter for the linesearch 
#     b = 0.01

#     xhist = zeros(nx, N)
#     xhist[:,1] = x0 

#     #this is the reference trajectory based off w 
#     xtraj = zeros(nx, N)
#     xtraj[:,1] = x0 
#     for i=2:N

#         xtraj[:,i] = w[idx.x[i-1]]

#     end

#     uhist = zeros(nu, N-1)

#     #this sigma is from 2:N
#     μhist = zeros(nx, N-1)

#     #initial step size 
#     α = 1.0

#     for i =1:max_linesearch_iters

#         #forward rollout 
#         for k=1:N-1
            
#             Δu_k = (K[:,:,k]*(xhist[:,k] - xtraj[:,k])) + α*f[:,k]

#             #+1 because there is no ctg calculated at k=1
#             Δμ_k = P[:,:,k+1]*(xhist[:,k] - xtraj[:,k]) - q[:,k+1] 

#             uhist[:,k] = w[idx.u[k]] + Δu_k

#             #apply a forward rollout on the dynamics with this control 
#             xhist[:,k+1] = A*xhist[:,k] + B*uhist[:,k]

#             #update the lagrage multipliers μ
#             μhist[:,k] = w[idx.μ[k]] + Δμ_k 
            
#         end

#         #create w and evaluate the cost 
#         new_w = create_w(idx, xhist, uhist, μhist)

#         #this is the delta from the forward pass
#         new_delta = new_w - w

#         #call the create H function here 
#         H_w = create_H(idx, w, ρ)

#         #jacobian of the residual function 
#         jac = [H_w C'; 
#         C zeros(idx.nμ,idx.nμ)] 

#         #this is the gradient of the L2 norm of the residual function
#         grad = (w' * jac)/norm(residual_function(w, ρ))

#         #the merit function is the l2 norm of the kkt residual 
#         #simple linesearch (working)
#         #if norm(residual_function(w + α*new_delta, ρ)) < norm(residual_function(w, ρ))
        
#         #armijo linesearch
#         if norm(residual_function(w + α*new_delta, ρ)) < norm(residual_function(w, ρ)) + b*α*grad*new_delta 

#             return xhist, uhist, μhist, α

#         else
#             α = 0.5*α

#         end

#     end

#     println("linesearch failed ")

# end

In [None]:
function forward_pass_ls(w, K, f, P, q, ρ)

    #state trajectory
    z = w[1:idx.nz]

    #lagrange multipliers 
    μ = w[idx.nz+ 1: idx.nz + idx.nμ]
    
    #parameter for the linesearch 
    b = 0.01

    xhist = zeros(nx, N)
    xhist[:,1] = x0 

    #this is the reference trajectory based off w 
    xtraj = zeros(nx, N)
    xtraj[:,1] = x0 
    for i=2:N

        xtraj[:,i] = w[idx.x[i-1]]

    end

    uhist = zeros(nu, N-1)

    #this sigma is from 2:N
    μhist = zeros(nx, N-1)

    #initial step size 
    α = 1.0

    for i =1:max_linesearch_iters

        #forward rollout 
        for k=1:N-1
            
            Δu_k = (K[:,:,k]*(xhist[:,k] - xtraj[:,k])) + α*f[:,k]

            #+1 because there is no ctg calculated at k=1
            Δμ_k = P[:,:,k+1]*(xhist[:,k] - xtraj[:,k]) - α*q[:,k+1] 

            #will see if theres no alpha on affine term on ctg (doesn't work)
            #Δμ_k = P[:,:,k+1]*(xhist[:,k] - xtraj[:,k]) - q[:,k+1] 

            uhist[:,k] = w[idx.u[k]] + Δu_k

            #apply a forward rollout on the dynamics with this control 
            xhist[:,k+1] = A*xhist[:,k] + B*uhist[:,k]

            #update the lagrage multipliers μ
            μhist[:,k] = w[idx.μ[k]] + Δμ_k 
            
        end

        #create w and evaluate the cost 
        new_w = create_w(idx, xhist, uhist, μhist)

        Jn = norm(residual_function(new_w, ρ))

        #this is the delta from the forward pass
        new_delta = new_w - w

        #call the create H function here 
        H_w = create_H(idx, w, ρ)

        #jacobian of the residual function 
        jac = [H_w C'; 
        C zeros(idx.nμ,idx.nμ)] 

        #this is the gradient of the L2 norm of the residual function
        grad = (w' * jac)/norm(residual_function(w, ρ))

        #print("res Jn: ", Jn)

        #println("res og: ", norm(residual_function(w, ρ)) + b*α*grad*new_delta)
        #the merit function is the l2 norm of the kkt residual 
        #simple linesearch (working)
        #if norm(residual_function(w + α*new_delta, ρ)) < norm(residual_function(w, ρ))

        if Jn < norm(residual_function(w, ρ)) + b*α*grad*new_delta

        #armijo linesearch
        #if norm(residual_function(w + α*new_delta, ρ)) < norm(residual_function(w, ρ)) + b*α*grad*new_delta 

            return new_w

        else
            α = 0.5*α

        end

    end

    println("linesearch failed ")

end

In [None]:
xhist_test, uhist_test, μhist_test = forward_pass(w0, K_test, f_test, P_test, q_test, ρ0)

In [None]:
#test out forward pass with linesearch. #old linesearch 

#xhist_test, uhist_test, μhist_test, α_test = forward_pass_ls(w0, K_test, f_test, P_test, q_test, ρ0)

In [None]:
#this is working, commented out for testing...
#new_w_ls = forward_pass_ls(w0, K_test, f_test, P_test, q_test, ρ0)

In [None]:
#this is after one iteration of the recursive alg. 
plot(xhist_test[1,:])  
plot!(xhist_test[2,:])  

In [None]:
#stack up the solution into the w vector 
w_1 = create_w(idx, xhist_test, uhist_test, μhist_test) 

In [None]:
#this is the step that the ricatti recursion makes after one iteration. it should match the newton step 
step_ricatti = w_1 - w0 

In [None]:
difference = step_ricatti - newton_step_1 

In [None]:
w0 

In [None]:
#do another forward and backward pass and compare it to the newton step 


K_test_2, f_test_2, P_test_2, q_test_2 = ricatti_recursion(w0 + step_ricatti, ρ0)
xhist_test_2, uhist_test_2, μhist_test_2 = forward_pass(w0 + step_ricatti, K_test_2, f_test_2, P_test_2, q_test_2, ρ0)
w_2_ricatti = create_w(idx, xhist_test_2, uhist_test_2, μhist_test_2) 

In [None]:
step_ricatti_2 = w_2_ricatti - w_1 

In [None]:
newton_step_2 

In [None]:
step_ricatti_2 - newton_step_2 

In [None]:
#initial residual 
residual_0 = residual_function(w0, ρ0)

In [None]:
norm(residual_function(w_1, ρ0))

In [None]:
#number of backward and forward passes
num_iters = 15

In [None]:
all_iters = zeros(idx.nz + idx.nμ, num_iters)

In [None]:
residuals = zeros(num_iters)

all_α = zeros(num_iters)

In [None]:
all_iters[:,1] = w0

residuals[1] = norm(residual_function(w0, ρ0))

In [None]:
tol = 1e-8

In [None]:
#this changes the original all_iters matrix bc the arguments are passed in by reference 

#this multiple iters function is for the old linesearch function 

# function multiple_iters(all_iters, ρ)
#     #run a couple iterations 
    
#     for i =1:num_iters-1

#         K_test, f_test, P_test, q_test = ricatti_recursion(all_iters[:,i], ρ)

#         #forward pass w no linesearch 
#         #xhist_test, uhist_test, μhist_test = forward_pass(all_iters[:,i], K_test, f_test, P_test, q_test, ρ)

#         #forward pass w linesearch 
#         xhist_test, uhist_test, μhist_test, α_test = forward_pass_ls(all_iters[:,i], K_test, f_test, P_test, q_test, ρ)

#         all_iters[:,i+1] = create_w(idx, xhist_test, uhist_test, μhist_test) 

#         #bug here 
#         #residuals[i+1] = norm(residual_function(all_iters[:,i+1], ρ0))

#         residuals[i+1] = norm(residual_function(all_iters[:,i+1], ρ))

#         all_α[i] = α_test

#         #user defined tolerance 
#         #if residuals[i+1] < tol 

#         #or use the barrier parameter as a tolerance 
#         if residuals[i+1] < ρ 

#             return all_iters[:, 1:i+1], residuals[1:i+1]

#         end

#     end

#     println("Failed to convert to tolerance")
#     println("Final residual of: ", residuals[end])

# end

In [None]:
function multiple_iters(all_iters, ρ)
    #run a couple iterations 
    
    for i =1:num_iters-1

        K_test, f_test, P_test, q_test = ricatti_recursion(all_iters[:,i], ρ)

        #forward pass w no linesearch 
        #xhist_test, uhist_test, μhist_test = forward_pass(all_iters[:,i], K_test, f_test, P_test, q_test, ρ)

        #forward pass w linesearch 
        new_w = forward_pass_ls(all_iters[:,i], K_test, f_test, P_test, q_test, ρ)

        all_iters[:,i+1] = new_w

        #bug here 
        #residuals[i+1] = norm(residual_function(all_iters[:,i+1], ρ0))

        residuals[i+1] = norm(residual_function(all_iters[:,i+1], ρ))

        #all_α[i] = α_test

        #user defined tolerance 
        #if residuals[i+1] < tol 

        #or use the barrier parameter as a tolerance 
        if residuals[i+1] < ρ 

            return all_iters[:, 1:i+1], residuals[1:i+1]

        end

    end

    println("Failed to convert to tolerance")
    println("Final residual of: ", residuals[end])

end

In [None]:
#this is working!
all_iters_test, residuals_test = multiple_iters(all_iters, ρ0)

In [None]:
#maximum iterations of decreasing the barrier 
max_iters = 6

In [None]:
all_residuals_iter = zeros(num_iters)

In [None]:
res_tol = 1e-8

In [None]:
#here we could use the barrier as the tolerance for each of the solves 
#and use the final trajectories of each solve as an initial guess for the next solve 

for k = 1:max_iters 

    #create a blank all_iters matrix 
    all_iters = zeros(idx.nz + idx.nμ, num_iters)

    #fill in the first column with the initial condition 

    all_iters[:,1] = w0

    #we will be updating w0 and ρ as we siteratively solve the problem to tighter barrier parameters

    #calculates the trajectory that satisfied the residuals 
    all_iters_k, residuals_k = multiple_iters(all_iters, ρ0)

    w0 = all_iters_k[:,end]

    #true kkt res 

    println("kkt res iteration ", k, ": ", residuals_k[end])

    ρ0 = 0.1*ρ0


    #when we pass in zero as the penalty, we get the original kkt conditions 
    # if norm(residual_function(w0, 0)) < res_tol 

    #     println("converged ")

    # else 
    #     #decrease barrier paramter 
    #     ρ0 = 0.1*ρ0

    # end

    #this logic is for the legit convergence 
    # if norm(kkt_conditions(traj)) < tol 

    #     println("converged to the true KKT conditions")

    # else 
    #     #decrease the barrier paramter 
    #     ρ0 = 0.1*ρ0

    # end 
    

end

In [None]:
w0 

In [None]:
nu 

In [None]:
x_solution_traj = zeros(nx, N)

u_solution_traj = zeros(nc, N-1)

x_solution_traj[:,1] = x0

for i=1:N-1

    x_solution_traj[:, i+1] = w0[idx.x[i]]
    u_solution_traj[1, i] = w0[idx.u[i]][1]


end

In [None]:
plot(x_solution_traj')

In [None]:
plot(u_solution_traj') 

In [None]:
w0[idx.μ[7]]