In [1]:
import pandas as pd
import torch
from feature_transforms.pipeline import ColumnPipeline
from utils.registry import get_encoder

# test dataframe
df = pd.DataFrame({
    'f1' : [0.1, 0.3, 0.2, 0.9],
    'f2' : [1.0, 0.2, 0.4, 0.5],
    'city' : ['NY', 'LA', 'NY', 'SF'],
})

# preprocessing
pipe = ColumnPipeline(
    numeric = ['yeo_johnson'],
    categorical = ['one_hot'])

X = pipe.fit_transform(df)
print('Pipeline output shape:', X.shape)

# instantiate encoder
Encoder = get_encoder('MLPEncoder')
enc = Encoder(
    input_dim = pipe.output_dim,
    output_dim = 32, # d
    hidden_dims = [64, 64]) # default

# forward pass
emb = enc(X)
print('Embedding shape:', emb.shape) # (batch, 32)


Pipeline output shape: (4, 5)
Embedding shape: torch.Size([4, 32])


In [None]:
import torch
import numpy as np
from adapters.kernel_to_multi import KernelToMulti
from adapters.multi_to_joint import MultiToJoint

# dummy kernel for testing
class LinearKernel:
    def compute(self, X1: torch.Tensor, X2: torch.Tensor) -> torch.Tensor:
        return X1 @ X2.T  # (B, N)

# testing data
torch.manual_seed(0)
X_ref = torch.randn(50, 8) # pretend x
A_ref = torch.randn(50, 6) # pretend a

# build first adapter
k2multi = KernelToMulti(
    k_obj = LinearKernel(),
    n_samples = 3,
    temperature = 0.5,
    max_reference = 50).fit(X_ref, A_ref)

X_query = torch.randn(4, 8)
A_samples, _ = k2multi.transform(X_query) # (4, 3, 6)
print('A ~ p(a | x) shape:', A_samples.shape)

# build 2nd adapter
multi2joint = MultiToJoint(n_pairs = 2, coupled = True)
A1, A2 = multi2joint.transform(A_samples) # both (4, 2, 6)
print('Joint pair shapes:', A1.shape, A2.shape)

# no identical rows when coupled = True
print('Any identical rows?', (A1 == A2).all(dim=-1).any().item())


A ~ p(a | x) shape: torch.Size([4, 3, 6])
Joint pair shapes: torch.Size([4, 2, 6]) torch.Size([4, 2, 6])
Any identical rows? False
