In [70]:
import numpy as np

## Triangular Matrix Square
Suppose $A$ is an $n\times n$ upper triangular matrix, calculate $A^2$ in-place.

### Method 1

In [71]:
def upper_triangular_square(A):
    n = A.shape[0]
    # Store the result into the lower triangle
    for i in range(0, n):
        for j in range(i + 1, n):
            A[j, i] = A[i, i:j+1] @ A[i:j+1, j]
        x = A[i, i]
        A[i, i] = x * x
    
    # Move the result back to the upper triangle
    for i in range(0, n):
        for j in range(i + 1, n):
            A[i, j] = A[j, i]
            A[j, i] = 0.0

In [72]:
n = 1000
A = np.triu(np.random.rand(n, n))
A_squared_correct = A @ A
upper_triangular_square(A)
print(np.allclose(A, A_squared_correct))

%timeit upper_triangular_square(A)


True
352 ms ± 9.78 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Method 2

In [73]:
def upper_triangular_square(A):
    n = A.shape[0]
    for i in range(0, n):
        for j in range(n - 1, i - 1, -1):
            A[i, j] = A[i, :] @ A[:, j]

In [74]:
n = 1000
A = np.triu(np.random.rand(n, n))
A_squared_correct = A @ A
upper_triangular_square(A)
print(np.allclose(A, A_squared_correct))

%timeit upper_triangular_square(A)


True
371 ms ± 9.33 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
