In [1]:
import numpy as np

### Calculating Perplexity

In [None]:
# Load from .npy files
predictions = np.load('predictions.npy')
targets = np.load('targets.npy')

# Print shapes
print(f'predictions has shape: {predictions.shape}')
print(f'targets has shape: {targets.shape}')


'''
predictions has shape: (32, 64, 256)
targets has shape: (32, 64)

target:
array([[105, 110,  32, ...,   0,   0,   0],
       [ 97, 110, 110, ...,   0,   0,   0],
       [111, 102,  32, ...,   0,   0,   0],
       ...,
       [105,  32,  97, ...,   0,   0,   0],
       [101, 100, 103, ...,   0,   0,   0],
       [121, 111, 117, ...,   0,   0,   0]], dtype=int32)

predictions:
array([[[-15.579997, -25.735575, -15.576893, ..., -15.574669,
         -15.571493, -15.569425],
        [-24.01082 , -35.80076 , -23.743649, ..., -23.807941,
         -23.727554, -23.804428],
        [-15.783699, -14.416848, -15.512791, ..., -15.729168,
         -15.671564, -15.53212 ],
        ...,
        [-22.37673 , -29.096514, -22.266487, ..., -22.157543,
         -22.212416, -22.285917],
        [-23.18771 , -39.62314 , -23.07188 , ..., -23.058746,
         -22.928747, -23.131004],
        [-21.843483, -26.035233, -21.877586, ..., -21.576801,
         -21.74238 , -21.694439]],

       [[-15.579997, -25.735575, -15.576893, ..., -15.574669,
         -15.571493, -15.569425],
        [-15.887024, -16.101957, -15.914328, ..., -15.740339,
         -15.764511, -15.746195],
        [-17.759518, -19.134003, -17.479977, ..., -17.778797,
         -17.484093, -17.56089 ],
         ...
'''

In [None]:
reshaped_targets = np.eye(predictions.shape[-1])[targets]
print(f"reshape_targets has shape : {reshaped_targets.shape}")

'''
reshaped_targets has shape: (32, 64, 256)

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [1., 0., 0., ..., 0., 0., 0.],
'''

In [None]:
log_p = np.sum(predictions * reshaped_targets, axis = -1)

In [None]:
# np.equal is a NumPy function that performs element-wise comparison.
# It compares each element in the targets array to 0.
# This function returns an array of the same shape as targets, with True where
# the elements are equal to 0, and False where they are not.

# 1.0 - np.equal(targets, 0) produces an array where 0 (where targets was 0)
#  becomes 1.0, and 1 (where targets was not 0) remains 0.0
non_pad = 1.0 - np.equal(targets, 0)
print(f'non_pad has shape: {non_pad.shape}\n')
print(f'non_pad looks like this: \n\n {non_pad}')

'''
non_pad has shape: (32, 64)

non_pad looks like this:

 [[1. 1. 1. ... 0. 0. 0.]
 [1. 1. 1. ... 0. 0. 0.]
 [1. 1. 1. ... 0. 0. 0.]
 ...
 [1. 1. 1. ... 0. 0. 0.]
 [1. 1. 1. ... 0. 0. 0.]
 [1. 1. 1. ... 0. 0. 0.]]
'''

In [None]:
# By computing the product of the log probabilities and the non_pad tensor
# you remove the effect of padding on the metric:
real_log_p = log_p * non_pad
print(f'real log probabilities still have shape: {real_log_p.shape}')

In [None]:
print(f'log probabilities before filtering padding: \n\n {log_p}\n')
print(f'log probabilities after filtering padding: \n\n {real_log_p}')

'''
log probabilities before filtering padding:

 [[ -5.39654493  -1.03111839  -0.66916656 ... -22.37672997 -23.18770981
  -21.84348297]
 [ -4.58577061  -1.13412857  -8.53803253 ... -20.15686035 -26.83709717
  -23.57501984]
 [ -5.22238874  -1.28241444  -0.17312431 ... -21.328228   -19.85441208
  -33.88444138]
 ...
 [ -5.39654493 -17.29168129  -4.36076593 ... -20.82580185 -21.06583786
  -22.44311523]
 [ -5.93131638 -14.24741745  -0.26373291 ... -26.74324799 -18.38433075
  -22.35527802]
 [ -5.67053604  -0.10595131   0.         ... -23.33252335 -28.08737564
  -23.87880707]]

log probabilities after filtering padding:

 [[ -5.39654493  -1.03111839  -0.66916656 ...  -0.          -0.
   -0.        ]
 [ -4.58577061  -1.13412857  -8.53803253 ...  -0.          -0.
   -0.        ]
 [ -5.22238874  -1.28241444  -0.17312431 ...  -0.          -0.
   -0.        ]
 ...
 [ -5.39654493 -17.29168129  -4.36076593 ...  -0.          -0.
   -0.        ]
 [ -5.93131638 -14.24741745  -0.26373291 ...  -0.          -0.
   -0.        ]
 [ -5.67053604  -0.10595131   0.         ...  -0.          -0.
   -0.        ]]
'''

In [None]:
log_ppx = np.sum(real_log_p, axis = 1) /  np.sum(non_pad, axis = 1)
log_ppx = np.mean(-log_ppx)
print(f'The log perplexity and perplexity of the model are respectively: {log_ppx} and {np.exp(log_ppx)}')

'''
The log perplexity and perplexity of the model are respectively: 2.6211854987065033 and 13.752016923578548
'''