In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from GaussianCopulaImp.gaussian_copula import GaussianCopula
from GaussianCopulaImp.helper_data_generation import generate_sigma, generate_mixed_from_gc
from GaussianCopulaImp.helper_evaluation import get_rmse
from GaussianCopulaImp.helper_mask import mask

## Confidence interval under full rank Gaussian copula model

In [3]:
seed = 101
var_types = {'cont':list(range(15)), 'ord':[], 'bin':[]}
n = 2000

In [4]:
sigma = generate_sigma(seed, p=sum([len(value) for value in var_types.values()]))
X = generate_mixed_from_gc(sigma=sigma, n=n, seed=seed, var_types=var_types)
X_masked = mask(X, mask_fraction=0.4, seed=seed)

In [5]:
model = GaussianCopula()

In [6]:
out = model.impute_missing(X=X_masked,verbose=True)

Copula correlation change ratio:  0.1247
Copula correlation change ratio:  0.068
Copula correlation change ratio:  0.0383
Copula correlation change ratio:  0.0233
Copula correlation change ratio:  0.0153
Copula correlation change ratio:  0.0107
Convergence at iteration 7


In [7]:
get_rmse(x_imp = out['imputed_data'], x_true = X, x_obs = X_masked, relative = True)

0.5222093777725176

In [23]:
cover_rate = {}
for alpha in [0.85, 0.9, 0.95]:
    out_confidence_interval = model.get_imputed_confidence_interval(alpha = alpha)
    missing_loc = np.isnan(X_masked)
    lower, upper = out_confidence_interval['lower'], out_confidence_interval['upper']
    cover = (lower[missing_loc] < X[missing_loc]) & (upper[missing_loc] > X[missing_loc])
    cover_rate[alpha] = np.array(cover).mean()

The returned confidence intervals are slightly more conservative than specified coverage rate

In [25]:
for key, val in cover_rate.items():
    print(f'The actual coverage rate is {val:.3f} with specified coverage rate {key:.3f}')

The actual coverage rate is 0.881 with specified coverage rate 0.850
The actual coverage rate is 0.922 with specified coverage rate 0.900
The actual coverage rate is 0.965 with specified coverage rate 0.950
