In [1]:
import torch
from torch import nn 
from torch import nn, Tensor
import torch.nn.functional as F

class NumEmbedding(nn.Module):
    def __init__(self, n: int, d_in: int, d_out: int, bias: bool = False):
        super().__init__()
        self.weight = nn.Parameter(Tensor(n, d_in, d_out))
        self.bias = nn.Parameter(Tensor(n, d_out))

        with torch.no_grad():
            for i in range(n):
                layer = nn.Linear(d_in, d_out)
                self.weight[i] = layer.weight.T
                if self.bias is not None:
                    self.bias[i] = layer.bias


    def forward(self, x_num):
        assert x_num.ndim == 3
        x = torch.einsum("bfi,fij->bfj",x_num,self.weight)
        if self.bias is not None:
            x = x + self.bias[None]
        return x

In [2]:
class CatEmbedding(nn.Module):
    def __init__(self, categories, d_embed):
        super().__init__()
        self.embedding = nn.Embedding(sum(categories), d_embed)
        self.offsets = nn.Parameter(
                torch.tensor([0] + categories[:-1]).cumsum(0),requires_grad=False)
        
        torch.nn.init.xavier_uniform_(self.embedding.weight.data)

    def forward(self, x_cat):
        """
        :param x_cat: Long tensor of size ``(batch_size, features_num)``
        """
        x = x_cat + self.offsets[None]
        return self.embedding(x) 
    

class CatLinear(nn.Module):
    """
    离散特征用Embedding实现线性层（等价于先F.onehot再nn.Linear()）
    输入shape: [batch_size,features_num], 
    输出shape: [batch_size,d_out]
    """
    def __init__(self, categories, d_out=1):
        super().__init__()
        self.fc = nn.Embedding(sum(categories), d_out)
        self.bias = nn.Parameter(torch.zeros((d_out,)))
        self.offsets = nn.Parameter(
                torch.tensor([0] + categories[:-1]).cumsum(0),requires_grad=False)

    def forward(self, x_cat):
        """
        :param x: Long tensor of size ``(batch_size, features_num)``
        """
        x = x_cat + self.offsets[None]
        return torch.sum(self.fc(x), dim=1) + self.bias 
    

In [3]:
class FMLayer(nn.Module):
    """
    FM交互项
    """

    def __init__(self, reduce_sum=True):
        super().__init__()
        self.reduce_sum = reduce_sum

    def forward(self, x): #注意：这里的x是公式中的 <v_i> * xi
        """
        :param x: Float tensor of size ``(batch_size, num_features, k)``
        """
        square_of_sum = torch.sum(x, dim=1) ** 2
        sum_of_square = torch.sum(x ** 2, dim=1)
        ix = square_of_sum - sum_of_square
        if self.reduce_sum:
            ix = torch.sum(ix, dim=1, keepdim=True)
        return 0.5 * ix


In [4]:
class FM(nn.Module):
    """
    完整FM模型。
    """

    def __init__(self, d_numerical, categories=None, d_embed=4,
                 n_classes = 1):
        super().__init__()
        if d_numerical is None:
            d_numerical = 0
        if categories is None:
            categories = []
        self.categories = categories
        self.n_classes = n_classes
        
        self.num_linear = nn.Linear(d_numerical,n_classes) if d_numerical else None
        self.cat_linear = CatLinear(categories,n_classes) if categories else None
        
        self.num_embedding = NumEmbedding(d_numerical,1,d_embed) if d_numerical else None
        self.cat_embedding = CatEmbedding(categories, d_embed) if categories else None
        
        if n_classes==1:
            self.fm = FMLayer(reduce_sum=True)
            self.fm_linear = None
        else:
            assert n_classes>=2
            self.fm = FMLayer(reduce_sum=False)
            self.fm_linear = nn.Linear(d_embed,n_classes)

    def forward(self, x):
        
        """
        x_num: numerical features
        x_cat: category features
        """
        x_num,x_cat = x
        
        #linear部分
        x = 0.0
        if self.num_linear:
            x = x + self.num_linear(x_num) 
        if self.cat_linear:
            x = x + self.cat_linear(x_cat)
        
        #交叉项部分
        x_embedding = []
        if self.num_embedding:
            x_embedding.append(self.num_embedding(x_num[...,None]))
        if self.cat_embedding:
            x_embedding.append(self.cat_embedding(x_cat))
        x_embedding = torch.cat(x_embedding,dim=1)
        
        if self.n_classes==1:
            x = x + self.fm(x_embedding)
            x = x.squeeze(-1)
        else: 
            x = x + self.fm_linear(self.fm(x_embedding)) 
        return x
    

In [5]:
##测试 NumEmbedding

num_embedding = NumEmbedding(2,1,4)
x_num = torch.randn(2,2)
x_out = (num_embedding(x_num.unsqueeze(-1)))
print(x_out.shape)    

torch.Size([2, 2, 4])


In [6]:
cat_embedding = CatEmbedding(categories = [3,2,2],d_embed=4) 
x_cat = torch.randint(0,2,(2,3))
print(x_cat)
x_out = cat_embedding(x_cat)
print(x_cat.shape)
print(x_out.shape)

tensor([[0, 0, 0],
        [1, 1, 0]])
torch.Size([2, 3])
torch.Size([2, 3, 4])


In [7]:
cat_linear = CatLinear(categories = [3,2,2],d_out=1) 
x_cat = torch.randint(0,2,(2,3))
x_out = cat_linear(x_cat)
print(x_cat.shape)
print(x_out.shape)

torch.Size([2, 3])
torch.Size([2, 1])


In [8]:
##测试 FMLayer

fm_layer = FMLayer(reduce_sum=False)

x = torch.randn(2,3,4)
x_out = fm_layer(x)
print(x_out.shape)

torch.Size([2, 4])


In [9]:
##测试 FM

fm = FM(d_numerical = 3, categories = [4,3,2],
        d_embed = 4,n_classes = 2)
self = fm 
x_num = torch.randn(2,3)
x_cat = torch.randint(0,2,(2,3))
fm((x_num,x_cat))

tensor([[-3.9062,  2.3074],
        [-1.3099, -0.7521]], grad_fn=<AddBackward0>)

In [10]:
import numpy as np 
import pandas as pd 
import datetime

from sklearn.model_selection import train_test_split

import torch
from torch import nn 
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F 
import torchkeras

def printlog(info):
    nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print(f'{info}...\n\n')
    

  from .autonotebook import tqdm as notebook_tqdm
2023-08-07 11:14:29.280747: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-08-07 11:14:29.325636: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [11]:
from sklearn.preprocessing import LabelEncoder, QuantileTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

dfdata = pd.read_csv('/data/snlp/zhangjl/datas/ctr/eat_pytorch_datasets/criteo_small.zip', 
                     sep='\t', header=None)
dfdata.columns = ['label'] + ["I"+str(x) for x in range(1,14)] + [
    "C"+str(x) for x in range(14,40)]



In [12]:
dfdata.head()

Unnamed: 0,label,I1,I2,I3,I4,I5,I6,I7,I8,I9,...,C30,C31,C32,C33,C34,C35,C36,C37,C38,C39
0,0,1.0,1,5.0,0.0,1382.0,4.0,15.0,2.0,181.0,...,e5ba7672,f54016b9,21ddcdc9,b1252a9d,07b5194c,,3a171ecb,c5c50484,e8b83407,9727dd16
1,0,2.0,0,44.0,1.0,102.0,8.0,2.0,2.0,4.0,...,07c540c4,b04e4670,21ddcdc9,5840adea,60f6221e,,3a171ecb,43f13e8b,e8b83407,731c3655
2,0,2.0,0,1.0,14.0,767.0,89.0,4.0,2.0,245.0,...,8efede7f,3412118d,,,e587c466,ad3062eb,3a171ecb,3b183c5c,,
3,0,,893,,,4392.0,,0.0,0.0,0.0,...,1e88c74f,74ef3502,,,6b3a5ca6,,3a171ecb,9117a34a,,
4,0,3.0,-1,,0.0,2.0,0.0,3.0,0.0,0.0,...,1e88c74f,26b3c7a7,,,21c9516a,,32c7478e,b34f3128,,


In [13]:
cat_cols = [x for x in dfdata.columns if x.startswith('C')]
num_cols = [x for x in dfdata.columns if x.startswith('I')]
num_pipe = Pipeline(steps = [('impute',SimpleImputer()),('quantile',QuantileTransformer())])
for col in cat_cols:
    dfdata[col]  = LabelEncoder().fit_transform(dfdata[col])
dfdata[num_cols] = num_pipe.fit_transform(dfdata[num_cols])
categories = [dfdata[col].max()+1 for col in cat_cols]


In [15]:
dfdata.head()

Unnamed: 0,label,I1,I2,I3,I4,I5,I6,I7,I8,I9,...,C30,C31,C32,C33,C34,C35,C36,C37,C38,C39
0,0,0.311311,0.368368,0.371872,0.0,0.378128,0.129129,0.813814,0.216717,0.853353,...,9,3691,226,2,7338,15,3,31851,57,18183
1,0,0.387387,0.206707,0.943443,0.096096,0.152448,0.18969,0.402903,0.216717,0.134134,...,0,2657,226,0,91081,15,3,10871,57,13708
2,0,0.387387,0.206707,0.071071,0.9004,0.229724,0.564064,0.532032,0.216717,0.896549,...,6,789,1854,3,215784,9,3,9477,69,30955
3,0,0.662663,0.971618,0.752753,0.614615,0.608531,0.717217,0.0,0.0,0.0,...,1,1769,1854,3,100823,15,3,23223,69,30955
4,0,0.433433,0.060561,0.752753,0.0,0.047047,0.0,0.473974,0.0,0.0,...,1,595,1854,3,31800,15,2,28797,69,30955


In [16]:
import torch 
from torch.utils.data import Dataset, dataloader 

class DfDataSet(Dataset):
    def __init__(self, df, 
                 label_col, 
                 num_features, 
                 cat_features, 
                 categories, 
                 is_training = True):
        self.X_num = torch.Tensor(df[num_features].values).float() if num_features else None
        self.X_cat = torch.Tensor(df[cat_features].values).long() if cat_features else None
        self.Y = torch.tensor(df[label_col].values).float()
        self.categories = categories
        self.is_training = is_training

    def __len__(self):
        return len(self.Y)
    
    def __getitem__(self, index):
        if self.is_training:
            return ((self.X_num[index], self.X_cat[index]), self.Y[index])
        else:
            return (self.X_num[index], self.X_cat[index])

    def get_categories(self):
        return self.categories
           

In [17]:
dftrain_val,dftest = train_test_split(dfdata,test_size=0.2)
dftrain,dfval = train_test_split(dftrain_val,test_size=0.2)
ds_train = DfDataSet(dftrain,label_col = "label",num_features = num_cols,cat_features = cat_cols,
                    categories = categories, is_training=True)
ds_val = DfDataSet(dfval,label_col = "label",num_features = num_cols,cat_features = cat_cols,
                    categories = categories, is_training=True)
ds_test = DfDataSet(dftest,label_col = "label",num_features = num_cols,cat_features = cat_cols,
                    categories = categories, is_training=True)

In [19]:
dl_train = DataLoader(ds_train, batch_size=2048, shuffle=True)
dl_val = DataLoader(ds_val,batch_size = 2048,shuffle=False)
dl_test = DataLoader(ds_test,batch_size = 2048,shuffle=False)
for features,labels in dl_train:
    break 

In [20]:
# define module
def create_net():
    net = FM(d_numerical=ds_train.X_num.shape[1], 
             categories=ds_train.get_categories(),
             d_embed=8,
             n_classes=1
             )
    return net 

from torchkeras import summary
net = create_net()
summary(net, input_data=features)

--------------------------------------------------------------------------
Layer (type)                            Output Shape              Param #
Linear-1                                     [-1, 1]                   14
Embedding-2                              [-1, 26, 1]            1,296,709
NumEmbedding-3                           [-1, 13, 8]                  208
Embedding-4                              [-1, 26, 8]           10,373,672
FMLayer-5                                    [-1, 1]                    0
Total params: 11,670,603
Trainable params: 11,670,603
Non-trainable params: 0
--------------------------------------------------------------------------
Input size (MB): 0.000084
Forward/backward pass size (MB): 0.002594
Params size (MB): 44.519817
Estimated Total Size (MB): 44.522495
--------------------------------------------------------------------------




In [21]:
# train module
import os,sys,time
import numpy as np 
import pandas as pd 
import datetime 
from tqdm import tqdm 

import torch 
from torch import nn 
from accelerate import Accelerator 
from copy import deepcopy 


def printlog(info):
    nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print("\n"+"=========="*8 + "%s"%nowtime)
    print(str(info)+"\n")
    
class StepRunner:
    def __init__(self, net, loss_fn,stage = "train", metrics_dict = None, 
                 optimizer = None, lr_scheduler = None,
                 accelerator = None
                 ):
        self.net,self.loss_fn,self.metrics_dict,self.stage = net,loss_fn,metrics_dict,stage
        self.optimizer,self.lr_scheduler = optimizer,lr_scheduler
        self.accelerator = accelerator
    
    def __call__(self, features, labels):
        #loss
        preds = self.net(features)
        loss = self.loss_fn(preds,labels)

        #backward()
        if self.optimizer is not None and self.stage=="train":
            if self.accelerator is  None:
                loss.backward()
            else:
                self.accelerator.backward(loss)
            self.optimizer.step()
            if self.lr_scheduler is not None:
                self.lr_scheduler.step()
            self.optimizer.zero_grad()
            
        #metrics
        step_metrics = {self.stage+"_"+name:metric_fn(preds, labels).item() 
                        for name,metric_fn in self.metrics_dict.items()}
        return loss.item(),step_metrics
    
    
class EpochRunner:
    def __init__(self,steprunner):
        self.steprunner = steprunner
        self.stage = steprunner.stage
        self.steprunner.net.train() if self.stage=="train" else self.steprunner.net.eval()
        
    def __call__(self,dataloader):
        total_loss,step = 0,0
        loop = tqdm(enumerate(dataloader), total =len(dataloader))
        for i, batch in loop:
            features,labels = batch
            if self.stage=="train":
                loss, step_metrics = self.steprunner(features,labels)
            else:
                with torch.no_grad():
                    loss, step_metrics = self.steprunner(features,labels)
                    
            step_log = dict({self.stage+"_loss":loss},**step_metrics)

            total_loss += loss
            step+=1
            if i!=len(dataloader)-1:
                loop.set_postfix(**step_log)
            else:
                epoch_loss = total_loss/step
                epoch_metrics = {self.stage+"_"+name:metric_fn.compute().item() 
                                 for name,metric_fn in self.steprunner.metrics_dict.items()}
                epoch_log = dict({self.stage+"_loss":epoch_loss},**epoch_metrics)
                loop.set_postfix(**epoch_log)

                for name,metric_fn in self.steprunner.metrics_dict.items():
                    metric_fn.reset()
        return epoch_log

class KerasModel(torch.nn.Module):
    def __init__(self,net,loss_fn,metrics_dict=None,optimizer=None,lr_scheduler = None):
        super().__init__()
        self.accelerator = Accelerator()
        self.history = {}
        
        self.net = net
        self.loss_fn = loss_fn
        self.metrics_dict = nn.ModuleDict(metrics_dict) 
        
        self.optimizer = optimizer if optimizer is not None else torch.optim.Adam(
            self.parameters(), lr=1e-2)
        self.lr_scheduler = lr_scheduler
        
        self.net,self.loss_fn,self.metrics_dict,self.optimizer = self.accelerator.prepare(
            self.net,self.loss_fn,self.metrics_dict,self.optimizer)

    def forward(self, x):
        if self.net:
            return self.net.forward(x)
        else:
            raise NotImplementedError


    def fit(self, train_data, val_data=None, epochs=10, ckpt_path='checkpoint.pt', 
            patience=5, monitor="val_loss", mode="min"):
        
        train_data = self.accelerator.prepare(train_data)
        val_data = self.accelerator.prepare(val_data) if val_data else []

        for epoch in range(1, epochs+1):
            printlog("Epoch {0} / {1}".format(epoch, epochs))
            
            # 1，train -------------------------------------------------  
            train_step_runner = StepRunner(net = self.net,stage="train",
                    loss_fn = self.loss_fn,metrics_dict=deepcopy(self.metrics_dict),
                    optimizer = self.optimizer, lr_scheduler = self.lr_scheduler,
                    accelerator = self.accelerator)
            train_epoch_runner = EpochRunner(train_step_runner)
            train_metrics = train_epoch_runner(train_data)
            
            for name, metric in train_metrics.items():
                self.history[name] = self.history.get(name, []) + [metric]

            # 2，validate -------------------------------------------------
            if val_data:
                val_step_runner = StepRunner(net = self.net,stage="val",
                    loss_fn = self.loss_fn,metrics_dict=deepcopy(self.metrics_dict),
                    accelerator = self.accelerator)
                val_epoch_runner = EpochRunner(val_step_runner)
                with torch.no_grad():
                    val_metrics = val_epoch_runner(val_data)
                val_metrics["epoch"] = epoch
                for name, metric in val_metrics.items():
                    self.history[name] = self.history.get(name, []) + [metric]
            
            # 3，early-stopping -------------------------------------------------
            arr_scores = self.history[monitor]
            best_score_idx = np.argmax(arr_scores) if mode=="max" else np.argmin(arr_scores)
            if best_score_idx==len(arr_scores)-1:
                torch.save(self.net.state_dict(),ckpt_path)
                print("<<<<<< reach best {0} : {1} >>>>>>".format(monitor,
                     arr_scores[best_score_idx]),file=sys.stderr)
            if len(arr_scores)-best_score_idx>patience:
                print("<<<<<< {} without improvement in {} epoch, early stopping >>>>>>".format(
                    monitor,patience),file=sys.stderr)
                break 
                
        self.net.load_state_dict(torch.load(ckpt_path))
            
        return pd.DataFrame(self.history)

    @torch.no_grad()
    def evaluate(self, val_data):
        val_data = self.accelerator.prepare(val_data)
        val_step_runner = StepRunner(net = self.net,stage="val",
                    loss_fn = self.loss_fn,metrics_dict=deepcopy(self.metrics_dict),
                    accelerator = self.accelerator)
        val_epoch_runner = EpochRunner(val_step_runner)
        val_metrics = val_epoch_runner(val_data)
        return val_metrics
        
       
    @torch.no_grad()
    def predict(self, dataloader):
        dataloader = self.accelerator.prepare(dataloader)
        result = torch.cat([self.forward(t[0]) for t in dataloader])
        return result.data

In [23]:
from torchkeras.metrics import AUC

net = create_net()
loss_fn = nn.BCEWithLogitsLoss()

metrics_dict = {"auc":AUC()}
optimizer = torch.optim.Adam(net.parameters(), lr=0.005, weight_decay=0.001) 

model = KerasModel(net,
                   loss_fn = loss_fn,
                   metrics_dict= metrics_dict,
                   optimizer = optimizer
                  )        

In [24]:
dfhistory = model.fit(train_data = dl_train,val_data = dl_val,
    epochs=20,
    ckpt_path='checkpoint.pt',
    patience=3,
    monitor='val_auc',
    mode='max')


Epoch 1 / 20



100%|██████████| 313/313 [00:41<00:00,  7.51it/s, train_auc=0.675, train_loss=0.759]
100%|██████████| 79/79 [00:06<00:00, 12.67it/s, val_auc=0.717, val_loss=0.558]
<<<<<< reach best val_auc : 0.7167243361473083 >>>>>>



Epoch 2 / 20



100%|██████████| 313/313 [00:40<00:00,  7.72it/s, train_auc=0.736, train_loss=0.524]
100%|██████████| 79/79 [00:05<00:00, 13.64it/s, val_auc=0.738, val_loss=0.512]
<<<<<< reach best val_auc : 0.7382006645202637 >>>>>>



Epoch 3 / 20



100%|██████████| 313/313 [00:40<00:00,  7.71it/s, train_auc=0.752, train_loss=0.497]
100%|██████████| 79/79 [00:06<00:00, 12.52it/s, val_auc=0.751, val_loss=0.495]
<<<<<< reach best val_auc : 0.7512558102607727 >>>>>>



Epoch 4 / 20



100%|██████████| 313/313 [00:41<00:00,  7.61it/s, train_auc=0.761, train_loss=0.484]
100%|██████████| 79/79 [00:06<00:00, 13.16it/s, val_auc=0.759, val_loss=0.484]
<<<<<< reach best val_auc : 0.7586027383804321 >>>>>>



Epoch 5 / 20



100%|██████████| 313/313 [00:41<00:00,  7.50it/s, train_auc=0.766, train_loss=0.478]
100%|██████████| 79/79 [00:05<00:00, 13.57it/s, val_auc=0.76, val_loss=0.482] 
<<<<<< reach best val_auc : 0.7603994011878967 >>>>>>



Epoch 6 / 20



100%|██████████| 313/313 [00:41<00:00,  7.56it/s, train_auc=0.768, train_loss=0.475]
100%|██████████| 79/79 [00:05<00:00, 13.95it/s, val_auc=0.763, val_loss=0.479]
<<<<<< reach best val_auc : 0.7630659341812134 >>>>>>



Epoch 7 / 20



100%|██████████| 313/313 [00:44<00:00,  7.09it/s, train_auc=0.77, train_loss=0.474] 
100%|██████████| 79/79 [00:05<00:00, 14.37it/s, val_auc=0.765, val_loss=0.477]
<<<<<< reach best val_auc : 0.7648440599441528 >>>>>>



Epoch 8 / 20



100%|██████████| 313/313 [00:42<00:00,  7.41it/s, train_auc=0.77, train_loss=0.474] 
100%|██████████| 79/79 [00:05<00:00, 13.79it/s, val_auc=0.765, val_loss=0.478]
<<<<<< reach best val_auc : 0.7649030089378357 >>>>>>



Epoch 9 / 20



100%|██████████| 313/313 [00:41<00:00,  7.50it/s, train_auc=0.77, train_loss=0.474] 
100%|██████████| 79/79 [00:05<00:00, 13.20it/s, val_auc=0.766, val_loss=0.477]
<<<<<< reach best val_auc : 0.7660222053527832 >>>>>>



Epoch 10 / 20



100%|██████████| 313/313 [00:41<00:00,  7.46it/s, train_auc=0.77, train_loss=0.474] 
100%|██████████| 79/79 [00:06<00:00, 11.71it/s, val_auc=0.766, val_loss=0.477]



Epoch 11 / 20



100%|██████████| 313/313 [00:42<00:00,  7.38it/s, train_auc=0.77, train_loss=0.474] 
100%|██████████| 79/79 [00:05<00:00, 14.18it/s, val_auc=0.765, val_loss=0.477]



Epoch 12 / 20



100%|██████████| 313/313 [00:44<00:00,  6.98it/s, train_auc=0.77, train_loss=0.474] 
100%|██████████| 79/79 [00:06<00:00, 11.40it/s, val_auc=0.766, val_loss=0.477]
<<<<<< val_auc without improvement in 3 epoch, early stopping >>>>>>
