# This collection of examples shows how to impute a matrix with missing entries.

In [1]:
import numpy as np
import pandas as pd
from pyglrm import *

## Example 1: automatically identify numpy.nan as missing entries

In [2]:
# a numpy array with missing entry to fit GLRM onto
A = np.array([[1, 2, np.nan, 4], [2, 4, 6, 8], [4, 5, 6, 7]])
# initialize GLRM 
g = glrm(n_components=2)

In [3]:
# fit GLRM onto A and impute the missing entry. Here the locations of missing entries are automatically identified 
g.fit_impute(A)

array([[0.99870304, 2.00032385, 3.0092679 , 3.99784477],
       [1.99713125, 3.9981317 , 6.01375681, 7.98871242],
       [4.00129844, 4.99872837, 5.99511497, 7.00083044]])

In [20]:
# get GLRM hyperparameters
g.get_hyperprams()

{'losses': <PyCall.jlwrap LowRankModels.QuadLoss(1.0, LowRankModels.RealDomain())>,
 'rx': <PyCall.jlwrap LowRankModels.ZeroReg()>,
 'ry': <PyCall.jlwrap LowRankModels.ZeroReg()>,
 'n_components': 2,
 'obs': [(0, 0),
  (0, 1),
  (0, 3),
  (1, 0),
  (1, 1),
  (1, 2),
  (1, 3),
  (2, 0),
  (2, 1),
  (2, 2),
  (2, 3)],
 'offset': False,
 'scale': False}

# Example 2: manually set the list of observed entries

In [5]:
# a numpy array with missing entry to fit GLRM onto
B = np.array([[1, 2, 9999, 4], [2, 4, 6, 8], [4, 5, 6, 7]])
# get a list of indices of non-NaN entries
obs = observations(B)
# regard the (0, 2) entry as missing
obs.remove((0, 2))
g.set_hyperparams(obs=obs)

In [6]:
# get hyperparameters of the GLRM instance 
g.get_hyperprams()

{'losses': <PyCall.jlwrap LowRankModels.QuadLoss(1.0, LowRankModels.RealDomain())>,
 'rx': <PyCall.jlwrap LowRankModels.ZeroReg()>,
 'ry': <PyCall.jlwrap LowRankModels.ZeroReg()>,
 'n_components': 2,
 'obs': [(0, 0),
  (0, 1),
  (0, 3),
  (1, 0),
  (1, 1),
  (1, 2),
  (1, 3),
  (2, 0),
  (2, 1),
  (2, 2),
  (2, 3)],
 'offset': False,
 'scale': False}

In [7]:
# fit GLRM onto A and impute the missing entry. Here we disable the functionality of letting GLRM automatically identify observed entries
g.fit_impute(B, identify_obs=False)

array([[0.99886478, 1.99808547, 2.99644759, 3.99720322],
       [2.00095189, 4.00065967, 5.99861659, 8.00136119],
       [3.98484371, 5.02039307, 6.00582326, 6.99015181]])