In [1]:
# Make sure helpers functionality can be imported
import os
import sys

project_path, _ = os.path.split(os.getcwd())
if project_path not in sys.path:
    sys.path.insert(0, project_path)

In [2]:
# Dependencies
# pip install numpy
# pip install pandas
# pip install sklearn

# Ignore warnings
import warnings; warnings.simplefilter("ignore")

# Import libraries
import numpy as np

## Load an experimental data

In [3]:
# Load an example dataset
from sklearn.datasets import load_boston

dataset = load_boston()
X = dataset.data
y = dataset.target
feature_names = dataset.feature_names

print(dataset.get('DESCR'))

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

## Remove the effect of covariates

In [4]:
from helpers.utils.transformers import remove_effect_of_covariates

# Show the original features
print("\nOriginal features\n")
print(X[0:8, 0:8])

# Create some artificial data to play the role of covariates
covariates = np.random.randn(X.shape[0], 3)

# Create the copy of the feature matrix
X_copy = X.copy()

# Remove the effect of covariates
X_copy = remove_effect_of_covariates(X_copy, covariates)

# Show the edited features
print("\nFeatures without the effect of covariates\n")
print(X_copy[0:8, 0:8])


Original features

[[6.3200e-03 1.8000e+01 2.3100e+00 0.0000e+00 5.3800e-01 6.5750e+00
  6.5200e+01 4.0900e+00]
 [2.7310e-02 0.0000e+00 7.0700e+00 0.0000e+00 4.6900e-01 6.4210e+00
  7.8900e+01 4.9671e+00]
 [2.7290e-02 0.0000e+00 7.0700e+00 0.0000e+00 4.6900e-01 7.1850e+00
  6.1100e+01 4.9671e+00]
 [3.2370e-02 0.0000e+00 2.1800e+00 0.0000e+00 4.5800e-01 6.9980e+00
  4.5800e+01 6.0622e+00]
 [6.9050e-02 0.0000e+00 2.1800e+00 0.0000e+00 4.5800e-01 7.1470e+00
  5.4200e+01 6.0622e+00]
 [2.9850e-02 0.0000e+00 2.1800e+00 0.0000e+00 4.5800e-01 6.4300e+00
  5.8700e+01 6.0622e+00]
 [8.8290e-02 1.2500e+01 7.8700e+00 0.0000e+00 5.2400e-01 6.0120e+00
  6.6600e+01 5.5605e+00]
 [1.4455e-01 1.2500e+01 7.8700e+00 0.0000e+00 5.2400e-01 6.1720e+00
  9.6100e+01 5.9505e+00]]

Features without the effect of covariates

[[-4.09236952e+00  7.76329402e+00 -8.96200184e+00 -5.61238452e-02
  -1.39418259e-02  2.69840703e-01 -1.88331322e+00  2.95921743e-01]
 [-3.27018388e+00 -1.11343422e+01 -4.04210870e+00 -7.81041