# This collection of examples shows how to impute a matrix with missing entries.

In [6]:
import numpy as np
import pandas as pd
from pyglrm import glrm, observations

## Example 1: automatically identify numpy.nan as missing entries

In [7]:
# a numpy array with missing entry to fit GLRM onto
A = np.array([[1, 2, np.nan, 4], [2, 4, 6, 8], [4, 5, 6, 7]])
# initialize GLRM 
g = glrm(n_components=2)

In [8]:
# fit GLRM onto A and impute the missing entry. Here the locations of missing entries are automatically identified 
g.fit_impute(A)

array([[1.00619531, 1.99253287, 3.00366876, 4.00290162],
       [2.01123193, 3.98294075, 6.00422545, 8.00171359],
       [3.9952336 , 5.01114387, 5.99294672, 6.99934027]])

In [9]:
# get GLRM hyperparameters
g.get_hyperprams()

{'losses': <PyCall.jlwrap LowRankModels.QuadLoss(1.0, LowRankModels.RealDomain())>,
 'rx': <PyCall.jlwrap LowRankModels.ZeroReg()>,
 'ry': <PyCall.jlwrap LowRankModels.ZeroReg()>,
 'n_components': 2,
 'obs': [(0, 0),
  (0, 1),
  (0, 3),
  (1, 0),
  (1, 1),
  (1, 2),
  (1, 3),
  (2, 0),
  (2, 1),
  (2, 2),
  (2, 3)],
 'offset': False,
 'scale': False}

# Example 2: manually set the list of observed entries

In [10]:
# a numpy array with missing entry to fit GLRM onto
B = np.array([[1, 2, 9999, 4], [2, 4, 6, 8], [4, 5, 6, 7]])
# get a list of indices of non-NaN entries
obs = observations(B)
# regard the (0, 2) entry as missing
obs.remove((0, 2))
g.set_hyperparams(obs=obs)

In [11]:
# get hyperparameters of the GLRM instance 
g.get_hyperprams()

{'losses': <PyCall.jlwrap LowRankModels.QuadLoss(1.0, LowRankModels.RealDomain())>,
 'rx': <PyCall.jlwrap LowRankModels.ZeroReg()>,
 'ry': <PyCall.jlwrap LowRankModels.ZeroReg()>,
 'n_components': 2,
 'obs': [(0, 0),
  (0, 1),
  (0, 3),
  (1, 0),
  (1, 1),
  (1, 2),
  (1, 3),
  (2, 0),
  (2, 1),
  (2, 2),
  (2, 3)],
 'offset': False,
 'scale': False}

In [12]:
# fit GLRM onto A and impute the missing entry. Here we disable the functionality of letting GLRM automatically identify observed entries
g.fit_impute(B, identify_obs=False)

array([[1.01436625, 2.00794468, 3.00358485, 3.99510385],
       [1.99236176, 3.99525442, 6.00225748, 8.00104437],
       [3.99955647, 4.99919028, 6.00349704, 6.99846281]])