### Descriptions
This notebook runs simulation defined in the (older) paper: Shu-Li. Sun, “Multi-sensor optimal information fusion kalman filter with applications” Airospace Science and Technology 8 (2004)57-62. Available at http://www.paper.edu.cn/scholar/showpdf/MUT2UN2IOTD0MxeQh. This looks like to be an earlier edition of Sun's paper.

We first reproduce the results defined in the paper. Then we replace the second layer with PSOF methods to compare the performance. It is shown that PSOF method achieves comparable performance as the method described in Sun's paper without the need for cross-covariance data calculation and communication.

Note the $\Sigma$ matrix from Sun's paper seems to be ill-conditioned initially, and I had to add $10^{-10}I_n$ to counter that issue.

### Simulation for two stage distributed Kalman filtering

In [None]:
import math
import numpy as np

#### Local Kalman filtering - Lemma 1

In [None]:
def local_kalman(x_t_t, p_t_t, y_tplus1, m_phi, m_gamma, m_h, m_q, m_r):
    """
    Given previous estimates & current measurement
    to return updated estimates and Kalman gain
    """
    i_n = np.identity(m_phi.shape[1])
    x_tplus1_t = m_phi @ x_t_t
    delta_tplus1 = y_tplus1 - m_h @ x_tplus1_t
    p_tplus1_t = m_phi @ p_t_t @ m_phi.T + m_gamma @ m_q @ m_gamma.T
    k_tplus1 = p_tplus1_t @ m_h.T @ np.linalg.inv(m_h @ p_tplus1_t @ m_h.T + m_r)
    x_tplus1_tplus1 = x_tplus1_t + k_tplus1 @ delta_tplus1
    p_tplus1_tplus1 = (i_n - k_tplus1 @ m_h) @ p_tplus1_t
    return x_tplus1_tplus1, p_tplus1_tplus1, k_tplus1

#### Cross-covariances between tracks - Lemma 2

In [None]:
def get_cross_covariance(phi, gamma, h_i, h_j, q, k_i, k_j, p_t_t):
    """
    Given modeling parameters, Kalman gains and previous cross covariances
    to update the new cross covariances
    """
    i_n = np.identity(phi.shape[1])
    p_tplus1_tplus1 = (i_n - k_i @ h_i) @ \
                      (phi @ p_t_t @ phi.T + gamma @ q @ gamma.T) @ \
                      (i_n - k_j @ h_j).T
    return p_tplus1_tplus1

#### Second stage optimal fusion

#####  Matrix coefficients for fusion - Theorem 1

In [None]:
def get_opti_fusion_matrix(p_ijs, x_is):
    """
    Given all covariances and cross-covariances, and multiple state estimates
    to return the mixing coefficient matrixes, fused results and covariance
    """
    l = round(math.sqrt(len(p_ijs)))
    n = p_ijs[0].shape[1]
    e = np.concatenate([np.identity(n) for _ in range(l)])
    Sigma = np.concatenate([np.concatenate(p_ijs[i*l:(i+1)*l], axis=1) for i in range(l)])
    Sigma_inv = np.linalg.inv(Sigma + 1e-10 * np.identity(n*l))
    P_0 = np.linalg.inv(e.T @ Sigma_inv @ e)
    A_bar = Sigma_inv @ e @ P_0
    x_0 = A_bar.T @ np.concatenate(x_is)
    return A_bar, x_0, P_0

##### Vector coefficients for fusion - Theorem 2

In [None]:
def get_opti_fusion_vector(p_ijs, x_is):
    """
    Given all covariances and cross-covariances, and multiple state estimates
    to return the mixing coefficient vectors, fused results and covariance
    """
    l = round(math.sqrt(len(p_ijs)))
    n = p_ijs[0].shape[1]
    e = np.concatenate([np.identity(n) for _ in range(l)])
    P_ij_nl_nl = np.concatenate([np.concatenate(p_ijs[i*l:(i+1)*l], axis=1) for i in range(l)])
    e_exp = np.concatenate([e.T for _ in range(l)])
    Sigma = P_ij_nl_nl * e_exp
    Sigma_inv = np.linalg.inv(Sigma + 1e-10 * np.identity(n*l))
    
    temp = np.linalg.inv(e.T @ Sigma_inv @ e)
    A_bar = Sigma_inv @ e @ temp
    x_0 = A_bar.T @ np.concatenate(x_is)
    P_0 = temp @ e.T @ Sigma_inv @ P_ij_nl_nl @ Sigma_inv @ e @ temp
    return A_bar, x_0, P_0

##### Scalar coefficients for fusion - Theorem 3

In [None]:
def get_opti_fusion_scalar(p_ijs, x_is):
    """
    Given all covariances and cross-covariances, and multiple state estimates
    to return the mixing coefficient scalars, fused results and covariance
    """
    l = round(math.sqrt(len(p_ijs)))
    n = p_ijs[0].shape[1]
    e = np.full((l,1), 1.0, dtype=float)
    P_ij_nl_nl = np.concatenate([np.concatenate(p_ijs[i*l:(i+1)*l], axis=1) for i in range(l)])
    Sigma = np.concatenate([np.concatenate([np.array([[np.trace(p_ijs[i*l + j])]], dtype=float) \
                                            for j in range(l)], axis=1) for i in range(l)])
    Sigma_inv = np.linalg.inv(Sigma + 1e-10 * np.identity(l))
    temp = np.linalg.inv(e.T @ Sigma_inv @ e)
    A_bar = Sigma_inv @ e @ temp
    x_0 = np.concatenate(x_is, axis=1) @ A_bar
    A_bar_exp = np.concatenate([A_bar[i, 0] * np.identity(n)for i in range(l)], axis=1)
    P_0 = A_bar_exp @ P_ij_nl_nl @ A_bar_exp.T
    return A_bar, x_0, P_0

#### Simulation for Sun's paper
To reproduce the simulation results in Sun's paper. In Table 1 of Sun's paper, its index data seemed to be off by one. For comparison purpose, we inherited this error.

In [None]:
# Simulation parameters

T = 0.01
sigma_w_sqr = 1.0
sigma_v1_sqr = 8.0
sigma_v2_sqr = 15.0
sigma_v3_sqr = 20.0

H_1 = np.array([[1.0, 0.0, 0.0]], np.float)
H_2 = np.array([[0.0, 1.0, 0.0]], np.float)
H_3 = np.array([[0.0, 0.0, 1.0]], np.float)
y_1 = np.array([1.0], np.float)
y_2 = np.array([1.0], np.float)
y_3 = np.array([1.0], np.float)

Phi = np.array([[1.0, T, T * T / 2.0], [0.0, 1.0, T], [0.0, 0.0, 1.0]], np.float)
Gamma = np.array([[0.0], [0.0], [1.0]], np.float)

# Distributed fusion
m_q = np.full((1,1), sigma_w_sqr)
m_r_1 = np.full((1,1), sigma_v1_sqr)
m_r_2 = np.full((1,1), sigma_v2_sqr)
m_r_3 = np.full((1,1), sigma_v3_sqr)
m_r = np.array([[sigma_v1_sqr, 0.0, 0.0], [0.0, sigma_v2_sqr, 0.0], [0.0, 0.0, sigma_v3_sqr]], dtype=float)

iterations_to_print = set([10, 50, 100, 150, 200])

for tag in ('u', 'm', 'v', 's'):
    x_0 = np.zeros((3,1), dtype=float)
    x_1 = x_2 = x_3 = x_0
    P_0 = 0.1 * np.identity(3)
    P_1_2 = P_1_3 = P_2_1 = P_2_3 = P_3_1 = P_3_2 = P_0
    P_1 = P_2 = P_3 = P_0
    
    for i in range(200):
        
        # Local Kalman filtering for channel 1
        x_1, P_1, K_1 = local_kalman(x_1, P_1, y_1, Phi, Gamma, H_1, m_q, m_r_1)
        # Local Kalman filtering for channel 2
        x_2, P_2, K_2 = local_kalman(x_2, P_2, y_2, Phi, Gamma, H_2, m_q, m_r_2)
        # Local Kalman filtering for channel 3
        x_3, P_3, K_3 = local_kalman(x_3, P_3, y_3, Phi, Gamma, H_3, m_q, m_r_3)
        if tag == 'u':
            # No second stage data fusion
            if (i+2) in iterations_to_print:
                print(f"Iteration: {(i+2)}  tr(P_1(t|t)): {np.trace(P_1)}  tr(P_2(t|t)): {np.trace(P_2)}  tr(P_3(t|t)): {np.trace(P_3)}")
        else:
            # Second stage data fusion
            # Cross-covariance updates
            P_1_2 = get_cross_covariance(Phi, Gamma, H_1, H_2, m_q, K_1, K_2, P_1_2)
            P_1_3 = get_cross_covariance(Phi, Gamma, H_1, H_3, m_q, K_1, K_3, P_1_3)
            P_2_3 = get_cross_covariance(Phi, Gamma, H_2, H_3, m_q, K_2, K_3, P_2_3)
            P_2_1 = P_1_2.T
            P_3_1 = P_1_3.T
            P_3_2 = P_2_3.T
            if tag == 'm':
                _, x_0, P_0 = get_opti_fusion_matrix([P_1, P_1_2, P_1_3, P_2_1, P_2, P_2_3, P_3_1, P_3_2, P_3], [x_1, x_2, x_3])
            elif tag == 'v':
                _, x_0, P_0 = get_opti_fusion_vector([P_1, P_1_2, P_1_3, P_2_1, P_2, P_2_3, P_3_1, P_3_2, P_3], [x_1, x_2, x_3])
            else:
                _, x_0, P_0 = get_opti_fusion_scalar([P_1, P_1_2, P_1_3, P_2_1, P_2, P_2_3, P_3_1, P_3_2, P_3], [x_1, x_2, x_3])
            if (i+2) in iterations_to_print:
                print(f"Iteration: {(i+2)}  tr(P_0^{tag}(t|t)): {np.trace(P_0)}")
            x_1 = x_2 = x_3 = x_0
            P_1 = P_2 = P_3 = P_0

# Centralized fusion
x_0 = np.zeros((3,1), dtype=float)
P_0 = 0.1 * np.identity(3)
y = np.zeros((1,3))
H = np.concatenate((np.concatenate((H_1, H_2)), H_3))
for i in range(200):
    x_0, P_0, _ = local_kalman(x_0, P_0, y, Phi, Gamma, H, m_q, m_r)
    if (i+2) in iterations_to_print:
        print(f"Iteration: {i+2}   tr(P_c(t|t)): {np.trace(P_0)}")

### Simulation for two stage distributed filtering with PSOF as the second stage

#### PSOF codes

In [None]:
import numpy as np
import math

def func_constant_step_size(sub_gradient, iteration, diff_from_best_estimate):
    return 0.0004

def func_constant_step_length(sub_gradient, iteration, diff_from_best_estimate):
    global csl_initial_norm
    N,M,_ = sub_gradient.shape
    norm = 0
    for i in range(N):
        norm += np.trace(np.matmul(sub_gradient[i], np.transpose(sub_gradient[i])))
    return 0.002 / np.sqrt(norm)
        
def func_square_summable_not_summable(sub_gradient, iteration, diff_from_best_estimate):
    return 3.0 / (iteration + 1)

def func_not_summable_diminishing_step_size(sub_gradient, iteration, diff_from_best_estimate):
    return 0.035 / np.sqrt(iteration + 1)

def func_not_summable_diminishing_step_length(sub_gradient, iteration, diff_from_best_estimate):
    global nsdsl_initial_norm
    N,M,_ = sub_gradient.shape
    norm = 0
    for i in range(N):
        norm += np.trace(np.matmul(sub_gradient[i], np.transpose(sub_gradient[i])))
    return 0.19 / np.sqrt(norm * (iteration + 1))

def func_polyak_with_estimate(sub_gradient, iteration, diff_from_best_estimate):
    global polyak_initial_norm
    N,M,_ = sub_gradient.shape
    norm = 0
    for i in range(N):
        norm += np.trace(np.matmul(sub_gradient[i], np.transpose(sub_gradient[i])))
    return (0.035 * norm / np.sqrt(iteration + 1) + diff_from_best_estimate) / norm

# Projected Subgradient Method

# Usage:
#   a_i, mse, v_opt, v_ij_opt = gdof (
#    N, M, 
#    joint_covariance, 
#    unknown_index_matrix)
#
# Inputs:
#   N - the input measurement count
#   M - the input measurement dimension
#   joint_covariance - the joint covariance matrix
#       for the all the measurements. It's a 4-d
#       tensor, with the first two dimensions
#       referring to the measurements and the last
#       two dimensions referring to the measurement
#       components. For unknown cross-correlation
#       matrices, the values are not used.
#   unknown_index_matrix - a bool numpy array.
#       the element of the matrix at
#       location (i,j) is set to be one if V_ij is unknown.
#       otherwise it is set to be zero.
#
# Outputs:
# . a_i - the matrix weights, a tensor of 3d, with the first
# .       dimension index being the measurement index
# . mse - the resulting mean square error
# . v_opt - the estimate covariance
#   v_ij_opt - the maximizing cross correlation matrix at (i,j)
def PSOF(N, M, joint_covariance, unknown_index_matrix, step_func=func_constant_step_size, max_iteration=12000):
    assert (N, N, M, M) == joint_covariance.shape
    assert (N, N) == unknown_index_matrix.shape
    V = joint_covariance
    B = np.zeros((N, M, M), dtype=np.float32)
    Lambda_inv_sqrt = np.zeros((N,M), dtype=np.float32)
    U = np.zeros((N, M, M), dtype=np.float32)
    for i in range(N):
        Lambda_inv_sqrt[i], U[i] = np.linalg.eigh(joint_covariance[i, i])
        Lambda_inv_sqrt[i] = np.reciprocal(np.sqrt(Lambda_inv_sqrt[i]))
        B[i] = np.matmul(np.diag(Lambda_inv_sqrt[i]), np.transpose(U[i]))
    
    V_prime = np.zeros((N, N, M, M), dtype=np.float32)
    for i in range(N):
        for j in range(N):
            if i != j and (not unknown_index_matrix[i, j]):
                V_prime[i, j] = np.matmul(np.diag(Lambda_inv_sqrt[i]), np.transpose(U[i]))
                V_prime[i, j] = np.matmul(V_prime[i, j], V[i, j])
                V_prime[i, j] = np.matmul(V_prime[i, j], U[j])
                V_prime[i, j] = np.matmul(V_prime[i, j], np.diag(Lambda_inv_sqrt[j]))
    
    Sigma = np.zeros((M, M), dtype=np.float32)
    for i in range(N):
        Sigma = Sigma + np.linalg.inv(V[i, i])
    Sigma_inv = np.linalg.inv(Sigma)

    # initial A_prime values
    A_prime = np.zeros((N, M, M), dtype=np.float32)
    for i in range(N):
        A_prime[i] = np.matmul(U[i], np.diag(np.reciprocal(Lambda_inv_sqrt[i]))) / N

    epislon = 1.0e-12
    mse = np.finfo(np.float32).max
    mse_iteration = []
    mse_best = np.finfo(np.float32).max
    last_mse_diff = 0
    A_prime_best = np.zeros((N, M, M), dtype=np.float32)
    for iteration in range(max_iteration):

        # Get SVD of A[j]^T A[i]
        C = np.zeros((N, N, M, M), dtype=np.float32)
        D = np.zeros((N, N, M, M), dtype=np.float32)
        Lambda_ij = np.zeros((N, N, M), dtype=np.float32)
        for i in range(N):
            for j in range(N):
                if i != j and unknown_index_matrix[i, j]:
                    C[i, j], Lambda_ij[i, j], D[i, j] = np.linalg.svd(np.matmul(np.transpose(A_prime[j]), A_prime[i]))
                    D[i, j] = np.transpose(D[i, j]) # convention of python linalg library
                    
        # compute subgradients
        dA_prime = np.zeros((N, M, M), np.float32)
        for i in range(N):
            dA_prime[i] = A_prime[i]
            for j in range(N):
                if j != i:
                    if unknown_index_matrix[i, j]:
                        dA_prime[i] = dA_prime[i] + np.matmul(np.matmul(A_prime[j], C[i, j]), np.transpose(D[i, j]))
                    else:
                        dA_prime[i] = dA_prime[i] + np.matmul(A_prime[j], np.transpose(V_prime[i, j]))
            dA_prime[i] = dA_prime[i] * 2.0

        # apply step size & subgradient
        step = step_func(dA_prime, iteration, last_mse_diff)
        for i in range(N):
            A_prime[i] = A_prime[i] - dA_prime[i] * step
            
        # project onto the constraint hyperplanes
        A_prime_dot_B = np.zeros((M, M), np.float32)
        for i in range(N):
            A_prime_dot_B = A_prime_dot_B + np.matmul(A_prime[i], B[i])
        A_prime_dot_B_Sigma_inv = np.matmul(np.eye(M, dtype=np.float32) - A_prime_dot_B, Sigma_inv)
        for i in range(N):
            A_prime[i] = A_prime[i] + np.matmul(A_prime_dot_B_Sigma_inv, np.transpose(B[i]))

        # compute mse
        mse_prime = 0.0
        for i in range(N):
            mse_prime = mse_prime + np.trace(np.matmul(np.transpose(A_prime[i]), A_prime[i]))
        for i in range(N):
            for j in range(N):
                if i != j:
                    if unknown_index_matrix[i, j]:
                        _, sigmas, _ = np.linalg.svd(np.matmul(np.transpose(A_prime[j]), A_prime[i]))
                        mse_prime = mse_prime + np.sum(sigmas)
                    else:
                        mse_prime = mse_prime + np.trace(np.matmul(np.matmul(A_prime[i], V_prime[i, j]), np.transpose(A_prime[j])))
        
        mse_iteration.append(mse_prime)
        #print('mse: ', mse_prime, mse_best, sigmas, A_prime)
        if mse_prime < mse_best:
            mse_best = mse_prime
            A_prime_best = A_prime
            last_mse_diff = 0
        last_mse_diff = mse_prime - mse_best
    
    A = np.zeros((N, M, M), dtype=np.float32)
    for i in range(N):
        A[i] = np.matmul(np.matmul(A_prime_best[i], np.diag(Lambda_inv_sqrt[i])), np.transpose(U[i]))
    V_ij_opt = np.zeros((N, N, M, M), dtype=np.float32)
    for i in range(N):
        for j in range(N):
            if i != j and unknown_index_matrix[i, j]:
                C_ij, Lambda_ij, D_ij = np.linalg.svd(np.matmul(np.transpose(A_prime_best[j]), A_prime_best[i]))
                part_left = np.matmul(U[i], np.diag(np.reciprocal(Lambda_inv_sqrt[i])))
                part_middle = np.matmul(np.transpose(D_ij), np.transpose(C_ij))
                part_right = np.matmul(np.diag(np.reciprocal(Lambda_inv_sqrt[j])), np.transpose(U[j]))
                V_ij_opt[i, j] = np.matmul(np.matmul(part_left, part_middle), part_right)
    V_opt = np.zeros((M, M), dtype=np.float32)
    for i in range(N):
        for j in range(N):
            if i != j and unknown_index_matrix[i, j]:
                V_opt = V_opt + np.matmul(np.matmul(A[i], V_ij_opt[i, j]), np.transpose(A[j]))
            else:
                V_opt = V_opt + np.matmul(np.matmul(A[i], V[i, j]), np.transpose(A[j]))
    
    return A, mse_best, V_opt, V_ij_opt, mse_iteration

#### Simulation

In [None]:
# Simulation parameters

T = 0.01
sigma_w_sqr = 1.0
sigma_v1_sqr = 8.0
sigma_v2_sqr = 15.0
sigma_v3_sqr = 20.0

H_1 = np.array([[1.0, 0.0, 0.0]], np.float)
H_2 = np.array([[0.0, 1.0, 0.0]], np.float)
H_3 = np.array([[0.0, 0.0, 1.0]], np.float)
y_1 = np.array([1.0], np.float)
y_2 = np.array([1.0], np.float)
y_3 = np.array([1.0], np.float)

Phi = np.array([[1.0, T, T * T / 2.0], [0.0, 1.0, T], [0.0, 0.0, 1.0]], np.float)
Gamma = np.array([[0.0], [0.0], [1.0]], np.float)

# Distributed fusion
m_q = np.full((1,1), sigma_w_sqr)
m_r_1 = np.full((1,1), sigma_v1_sqr)
m_r_2 = np.full((1,1), sigma_v2_sqr)
m_r_3 = np.full((1,1), sigma_v3_sqr)

iterations_to_print = set([10, 50, 100, 150, 200])

unknown_index_matrix = np.array([[False, True, True], [True, False, True], [True, True, False]], dtype=bool)
N = 3
M = 3

x_0 = np.zeros((3,1), dtype=float)
x_1 = x_2 = x_3 = x_0
P_0 = 0.1 * np.identity(3)
P_1 = P_2 = P_3 = P_0

for i in range(200):

    # Local Kalman filtering for channel 1
    x_1, P_1, K_1 = local_kalman(x_1, P_1, y_1, Phi, Gamma, H_1, m_q, m_r_1)
    # Local Kalman filtering for channel 2
    x_2, P_2, K_2 = local_kalman(x_2, P_2, y_2, Phi, Gamma, H_2, m_q, m_r_2)
    # Local Kalman filtering for channel 3
    x_3, P_3, K_3 = local_kalman(x_3, P_3, y_3, Phi, Gamma, H_3, m_q, m_r_3)
    # Second stage data fusion with PSOF
    joint_covariance = np.array([[P_1, P_1, P_1],[P_2, P_2, P_2], [P_3, P_3, P_3]], np.float32)
    _, _, P_0, _, _ = PSOF(N, M, joint_covariance, unknown_index_matrix)
    if (i+1) in iterations_to_print:
        print(f"Iteration: {(i+1)}")
        print(f"  tr(P_0^psof(t|t)): {np.trace(P_0)}")
    P_1 = P_2 = P_3 = P_0