common file to explore the compatibility 

of composite compression framework and ptls models

In [2]:
import os

os.chdir('..')

In [3]:
import torch
import torch.nn
import torchvision.datasets
from fedot.core.pipelines.pipeline_builder import PipelineBuilder
from torch import optim, nn
from torch.utils.data import DataLoader, Dataset
from torchvision.models import resnet18
from torchvision.transforms import transforms
import numpy as np
import matplotlib.pyplot as plt
from fedcore.tools.ruler import PerformanceEvaluator

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
from tqdm import tqdm
import json
from datetime import datetime
def save_json(obj, path):
    with open(path, 'w') as file:
        json.dump(obj, file)

def load_json(path):
    with open(path, 'r') as file:
        return json.load(file)
    
import pickle

def save_pkl(obj, path):
    with open(path, 'wb') as file:
        pickle.dump(obj, file)

def load_pkl(path):
    with open(path, 'rb') as file:
        return pickle.load(file)

In [5]:
import os
import pandas as pd

# source_data = pd.read_csv('https://huggingface.co/datasets/dllllb/age-group-prediction/resolve/main/transactions_train.csv.gz?download=true', 
#                           compression='gzip',
#                           nrows=450_577)
source_data = pd.read_csv('/ptls-experiments/scenario_age_pred/notebooks/data/transactions_train.csv',
                          nrows=1_450_577)
source_data.head(2)

Unnamed: 0,client_id,trans_date,small_group,amount_rur
0,33172,6,4,71.463
1,33172,6,35,45.017


In [6]:
from ptls.preprocessing import PandasDataPreprocessor
import pickle

if not os.path.exists('preprocessor_age-group-prediction.p'):
    preprocessor = PandasDataPreprocessor(
        col_id='client_id',
        col_event_time='trans_date',
        event_time_transformation='none',
        cols_category=['small_group'],
        cols_numerical=['amount_rur'],
        return_records=True,
    )
    preprocessor.fit(source_data)
    with open('preprocessor_age-group-prediction.p', 'wb') as file:
        pickle.dump(preprocessor, file)
else:
    with open('preprocessor_age-group-prediction.p', 'rb') as file:
        preprocessor = pickle.load(file) 

In [7]:
dataset = preprocessor.transform(source_data)

In [8]:
dataset = sorted(dataset, key=lambda x: x['client_id'])
from sklearn.model_selection import train_test_split

train, test = train_test_split(dataset, test_size=0.2, random_state=42)

len(train), len(test)

(1332, 333)

In [9]:
from functools import partial
from ptls.nn import TrxEncoder, RnnSeqEncoder, GptEncoder
from ptls.nn.head import Head
from ptls.frames.coles import CoLESModule


In [10]:
def estimate_size(model):
    param_size = torch.zeros((1,), dtype=torch.float64)
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()
    buffer_size = torch.zeros_like(param_size)
    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()

    size_all_mb = param_size / (1 << 20) + buffer_size / (1 << 20)
    print('model size: {:.3f}MB'.format(size_all_mb.item()))
    return size_all_mb.item()


# estimate_size(model)

In [15]:
def predict_ptls(model, predict_dataloader):
    def get_device(model):
        return next(iter(model.parameters()))
    device = get_device(model)
    model.eval()
    predictions = []
    with torch.no_grad():
        for batch in predict_dataloader:
            if isinstance(batch, tuple):
                X = batch[0]
            else:
                X = batch

            predictions.append(
                model(X.to(device))
            )
    return torch.cat(predictions)


In [16]:
from ptls.data_load.datasets import MemoryMapDataset
from ptls.data_load.iterable_processing import SeqLenFilter
from ptls.frames.coles import ColesDataset
from ptls.frames.coles.split_strategy import SampleSlices
from ptls.frames import PtlsDataModule

ptls_data_module = PtlsDataModule(
    train_data=ColesDataset(
        MemoryMapDataset(
            data=train,
            i_filters=[
                SeqLenFilter(min_seq_len=25),
            ],
        ),
        splitter=SampleSlices(
            split_count=5,
            cnt_min=25,
            cnt_max=200,
        ),
    ),
    train_num_workers=4,
    train_batch_size=256,
    test_data=ColesDataset(
        MemoryMapDataset(
            data=test,
            i_filters=[
                SeqLenFilter(min_seq_len=25),
            ],
        ),
        splitter=SampleSlices(
            split_count=5,
            cnt_min=25,
            cnt_max=200,
        ),
    ),
    test_num_workers=4,
    test_batch_size=256,
)

2024-09-29 15:43:34,357 - Loaded 1332 records
2024-09-29 15:43:34,360 - Loaded 333 records


In [17]:
from ptls.frames.coles.losses.contrastive_loss import ContrastiveLoss
from ptls.frames.coles.sampling_strategies.hard_negative_pair_selector import HardNegativePairSelector


from fedcore.api.utils.data import get_compression_input

# input_data = get_compression_input(model, 
#                                    ptls_data_module.train_dataloader(), 
#                                    ptls_data_module.test_dataloader(),
#                                    num_classes=1,
#                                    train_loss=partial(ContrastiveLoss, margin=0.5, sampling_strategy=HardNegativePairSelector(neg_count=5))
#                                 )



In [18]:
from fedcore.api.main import FedCore
from fedcore.api.utils.evaluation import evaluate_original_model, evaluate_optimised_model

experiment_setup = {'compression_task': 'training',
                    # 'cv_task': 'classification',
                    'model_params': dict(epochs=1,
                                         )
}
# experiment_setup = {'compression_task': 'pruning',
#                     # 'cv_task': 'classification',
#                     'model_params': dict(epochs=1,
#                                          pruning_iterations=1,
#                                          learning_rate=0.001,
#                                          importance='MagnitudeImportance',
#                                          pruner_name='magnitude_pruner',
#                                          importance_norm=1,
#                                          pruning_ratio=0.5,
#                                          finetune_params={'epochs': 1,
#                                                           'custom_loss': None,
#                                                         #   {'ptls_contrastive': CONTRASTIVE_LOSS()}
#                                                           }
#                                          )
# }
# experiment_setup = {'compression_task': 'low_rank',
#                     # 'cv_task': 'classification',
#                     'model_params': dict(epochs=1,
#                                          learning_rate=0.001,
#                                          hoyer_loss=0.2,
#                                          energy_thresholds=[0.99],
#                                          orthogonal_loss=5,
#                                          decomposing_mode='channel',
#                                          spectrum_pruning_strategy='energy',
#                                          finetune_params={'epochs': 1,
#                                                           'custom_loss': None}
#                                          )
#                     }

# experiment_setup = {'compression_task': 'composite_compression',
#                     'model_params': dict(pruning_model=dict(epochs=1,
#                                                             pruning_iterations=3,
#                                                             learning_rate=0.001,
#                                                             importance='MagnitudeImportance',
#                                                             pruner_name='magnitude_pruner',
#                                                             importance_norm=1,
#                                                             pruning_ratio=0.75,
#                                                             finetune_params={'epochs': 5,
#                                                                              'custom_loss': None}
#                                                             ),
#                                          low_rank_model=dict(epochs=5,
#                                                              learning_rate=0.001,
#                                                              hoyer_loss=0.2,
#                                                              energy_thresholds=[0.9],
#                                                              orthogonal_loss=5,
#                                                              decomposing_mode='channel',
#                                                              spectrum_pruning_strategy='energy',
#                                                              finetune_params={'epochs': 10,
#                                                                               'custom_loss': None}
#                                                              ),
#                                          training_model=dict(
#                                              epochs=1,
#                                          )                    
#                                          ),
                                         
#                     'initial_assumption': ['training_model',
#                                             'pruning_model',
#                                               'low_rank_model'
#                                               ]}


In [None]:
# DATA_DIR = '../scenario_age_pred/notebooks/data'
# def run_experiment_(DATA_DIR, loss, define_model, fedcore_setup, save_dir, n_cls=2, folds=range(5)):
#     log_path = os.path.join(save_dir, 'exp.json')

#     res = []
#     for fold_i in folds:
#         exp_res = {'fold': fold_i} 

#         df_trx_pretrain = pd.read_pickle(f'{DATA_DIR}/fold_{fold_i}/df_trx_pretrain.pickle')
#         df_seq_pretrain = pd.read_pickle(f'{DATA_DIR}/fold_{fold_i}/df_seq_pretrain.pickle')

#         with open(f'data/fold_{fold_i}/pdp.pickle', 'rb') as f:
#             pdp = pickle.load(f)
            
#         df_seq_pretrain_train, df_seq_pretrain_valid = train_test_split(
#             df_seq_pretrain, test_size=0.05, shuffle=True, random_state=42)
        
#         coles_data_module = ptls.frames.PtlsDataModule(
#         train_data=ptls.frames.coles.ColesDataset(
#             data=ptls.data_load.datasets.MemoryMapDataset(
#                 df_seq_pretrain_train.to_dict(orient='records') + 
#                 df_trx_pretrain.to_dict(orient='records')
#             ),
#             splitter=ptls.frames.coles.split_strategy.SampleSlices(
#                 split_count=5,
#                 cnt_min=25,
#                 cnt_max=200,
#             ),
#         ),
#         valid_data=ptls.frames.coles.ColesDataset(
#             data=ptls.data_load.datasets.MemoryMapDataset(
#                 df_seq_pretrain_train.to_dict(orient='records')),
#             splitter=ptls.frames.coles.split_strategy.SampleSlices(
#                 split_count=5,
#                 cnt_min=25,
#                 cnt_max=100,
#             ),
#         ),
#         train_batch_size=64,
#         train_num_workers=4,
#         valid_batch_size=650,
#         )

#         model = define_model()
#         input_data  = get_compression_input(model, 
#                                     coles_data_module.train_dataloader(), 
#                                     coles_data_module.valid_dataloader(),
#                                     num_classes=n_cls,
#                                     train_loss=loss
#                                     )
#         fedcore_compressor = FedCore(**fedcore_setup)
#         training_time_0 = datetime.now().timestamp()
#         exp_res['training_time_0'] = training_time_0
#         fedcore_compressor.fit((input_data, model), manually_done=True)
#         training_time_1 = datetime.now().timestamp()
#         exp_res['training_time_1'] = training_time_1

#         save_json(res, log_path)

#         del input_data
#         del model
#     return fedcore_compressor
    
    

In [19]:
def run_experiment(directory, ptls_data_module, loss, define_model, fedcore_setup, n_cls=2, n=3):
    log_path = os.path.join(directory, 'exp.json')
    
    res = []
    for i in tqdm(range(n)):
        exp_res = {'iter': i} 
        model = define_model()
        input_data  = get_compression_input(model, 
                                   ptls_data_module.train_dataloader(), 
                                   ptls_data_module.test_dataloader(),
                                   num_classes=n_cls,
                                   train_loss=loss
                                )

        start_size = estimate_size(model)
        exp_res['start_size'] = start_size
        
        
        fedcore_compressor = FedCore(**fedcore_setup)
        training_time_0 = datetime.now().timestamp()
        exp_res['training_time_0'] = training_time_0

        fedcore_compressor.fit((input_data, model), manually_done=True)

        training_time_1 = datetime.now().timestamp()
        exp_res['training_time_1'] = training_time_1

        end_size = estimate_size(fedcore_compressor.optimised_model)
        exp_res['end_size'] = end_size
        exp_res['orig_size'] = estimate_size(fedcore_compressor.original_model)
        save_pkl(fedcore_compressor.optimised_model, path=os.path.join(directory, f'model_{i}.pkl'))
        save_pkl(fedcore_compressor.original_model, path=os.path.join(directory, f'model_{i}_or.pkl'))
        res.append(exp_res)
        save_json(res, log_path)

        del input_data
        del model
    return fedcore_compressor

In [20]:
def define_coles():
    trx_encoder_params = dict(
    embeddings_noise=0.003,
    numeric_values={'amount_rur': 'identity'},
    embeddings={
        'trans_date': {'in': 800, 'out': 16},
        'small_group': {'in': 250, 'out': 16},
    },
)

    seq_encoder = RnnSeqEncoder(
        trx_encoder=TrxEncoder(**trx_encoder_params),
        hidden_size=256,
        type='gru',
    )

    model = CoLESModule(
        seq_encoder=seq_encoder,
        optimizer_partial=partial(torch.optim.Adam, lr=0.001),
        lr_scheduler_partial=partial(torch.optim.lr_scheduler.StepLR, step_size=30, gamma=0.9),
        head=Head(use_norm_encoder=True, 
                input_size=256,
                hidden_layers_sizes=[256, 256])
    )
    return model

In [None]:
fedcore_compressor = run_experiment('compression_experiments/composite_age', 
               ptls_data_module,
               partial(ContrastiveLoss, margin=0.5, sampling_strategy=HardNegativePairSelector(neg_count=5)),
               define_coles,
               fedcore_setup = {'compression_task': 'composite_compression',
                    'common': dict(save_each=5),
                    'model_params': dict(pruning_model=dict(epochs=2,
                                                            pruning_iterations=3,
                                                            learning_rate=0.001,
                                                            importance='MagnitudeImportance',
                                                            pruner_name='magnitude_pruner',
                                                            importance_norm=1,
                                                            pruning_ratio=0.75,
                                                            finetune_params={'epochs': 1,
                                                                             'custom_loss': None}
                                                            ),
                                         low_rank_model=dict(epochs=30,
                                                             learning_rate=0.001,
                                                             hoyer_loss=0.2,
                                                             energy_thresholds=[0.9],
                                                             orthogonal_loss=5,
                                                             decomposing_mode='channel',
                                                             spectrum_pruning_strategy='energy',
                                                             finetune_params={'epochs': 1,
                                                                              'custom_loss': None}
                                                             ),
                                         training_model=dict(
                                             epochs=10,
                                         )                    
                                         ),  
                    'initial_assumption': [
                        # 'training_model',
                                            # 'pruning_model', 
                                            'low_rank_model',
                                            #   'pruning_model'
                                              ]},
              n =1
               )

In [31]:
fedcore_compressor.original_model

CoLESModule(
  (_loss): ContrastiveLoss()
  (_seq_encoder): RnnSeqEncoder(
    (trx_encoder): TrxEncoder(
      (embeddings): ModuleDict(
        (trans_date): NoisyEmbedding(
          800, 16, padding_idx=0
          (dropout): Dropout(p=0, inplace=False)
        )
        (small_group): NoisyEmbedding(
          250, 16, padding_idx=0
          (dropout): Dropout(p=0, inplace=False)
        )
      )
      (custom_embeddings): ModuleDict(
        (amount_rur): IdentityScaler()
      )
      (custom_embedding_batch_norm): RBatchNorm(
        (bn): BatchNorm1d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (seq_encoder): RnnEncoder(
      (rnn): GRU(33, 256, batch_first=True)
      (reducer): LastStepEncoder()
    )
  )
  (_validation_metric): BatchRecallTopK()
  (_head): Head(
    (model): Sequential(
      (0): Linear(in_features=256, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=256, bias=True)
 

In [32]:
estimate_size(fedcore_compressor.original_model), estimate_size(fedcore_compressor.optimised_model)

model size: 1.420MB
model size: 1.423MB


(1.4195785522460938, 1.4226303100585938)

# NSP

In [118]:
from ptls.data_load.datasets import MemoryMapDataset
from ptls.data_load.iterable_processing import SeqLenFilter
from ptls.frames.bert import NspDataset
from ptls.frames.coles.split_strategy import SampleSlices
from ptls.frames import PtlsDataModule

nsp_dl = PtlsDataModule(
    train_data=NspDataset(
        MemoryMapDataset(
            data=train,
            i_filters=[
                SeqLenFilter(min_seq_len=25),
            ],
        ),
        splitter=SampleSlices(
            split_count=5,
            cnt_min=25,
            cnt_max=200,
        ),
    ),
    train_num_workers=16,
    train_batch_size=256,
    test_data=NspDataset(
        MemoryMapDataset(
            data=test,
            i_filters=[
                SeqLenFilter(min_seq_len=25),
            ],
        ),
        splitter=SampleSlices(
            split_count=5,
            cnt_min=25,
            cnt_max=200,
        ),
    ),
    test_num_workers=16,
    test_batch_size=256,
)

2024-09-23 14:51:20,543 - Loaded 1332 records
2024-09-23 14:51:20,546 - Loaded 333 records


In [35]:
nn.TransformerEncoderLayer

torch.nn.modules.transformer.TransformerEncoderLayer

In [117]:
from functools import partial
from ptls.nn import TrxEncoder, RnnSeqEncoder
from ptls.frames.bert import SopNspModule

def define_nsp():
    trx_encoder_params = dict(
        embeddings_noise=0.003,
        numeric_values={'amount_rur': 'identity'},
        embeddings={
            'trans_date': {'in': 800, 'out': 16},
            'small_group': {'in': 250, 'out': 16},
        },
    )

    seq_encoder = RnnSeqEncoder(
        trx_encoder=TrxEncoder(**trx_encoder_params),
        hidden_size=32,
        type='lstm',
    )

    model = SopNspModule(
        seq_encoder=seq_encoder,
        hidden_size = 256,
        drop_p = 0.2,
        optimizer_partial=partial(torch.optim.Adam, lr=0.001),
        lr_scheduler_partial=partial(torch.optim.lr_scheduler.StepLR, step_size=30, gamma=0.9),
    )
    return model

В Batch Handler нужно подавать функцию (batch) -> (X, y), где Х, y: loss(y, model(x))

In [None]:
class DecomposedParameter(torch.nn.parameter.Parameter):
    def __init__(self, data, requires_grad=True):
        U, S, Vh = torch.linalg.svd(data, full_matrices=False)
        self.U = torch.nn.Parameter(U)
        self.S = torch.nn.Parameter(S)
        self.Vh = torch.nn.Parameter(Vh)
        
    
        



In [119]:
from ptls.loss import BCELoss

run_experiment('compression_experiments/composite_age/nsp', 
               nsp_dl,
               BCELoss,
               define_nsp,
               fedcore_setup = {'compression_task': 'composite_compression',
                    'model_params': dict(pruning_model=dict(epochs=5,
                                                            pruning_iterations=3,
                                                            learning_rate=0.001,
                                                            importance='MagnitudeImportance',
                                                            pruner_name='magnitude_pruner',
                                                            importance_norm=1,
                                                            pruning_ratio=0.75,
                                                            finetune_params={'epochs': 5,
                                                                             'custom_loss': None}
                                                            ),
                                         low_rank_model=dict(epochs=30,
                                                             learning_rate=0.001,
                                                             hoyer_loss=0.2,
                                                             energy_thresholds=[0.9],
                                                             orthogonal_loss=5,
                                                             decomposing_mode='channel',
                                                             spectrum_pruning_strategy='energy',
                                                             finetune_params={'epochs': 10,
                                                                              'custom_loss': None}
                                                             ),
                                         training_model=dict(
                                             epochs=100,
                                         )                    
                                         ),
                                         
                    'initial_assumption': ['training_model',
                                            'pruning_model', 
                                            'low_rank_model',
                                              'pruning_model'
                                              ]},
              n =1
              )

  0%|          | 0/1 [00:00<?, ?it/s]

model size: 0.101MB
2024-09-23 14:52:10,442 - Initialising experiment setup
2024-09-23 14:52:10,672 - Initialising Industrial Repository
2024-09-23 14:52:10,676 - Initialising solver
2024-09-23 14:52:10,677 - Initialising experiment setup
Forcely substituted loss to BCELoss(
  (loss): BCELoss()
)


  0%|          | 0/6 [00:12<?, ?it/s]
  0%|          | 0/1 [00:13<?, ?it/s]


AttributeError: 'tuple' object has no attribute 'to'

In [120]:
_ = next(iter(nsp_dl.train_dataloader()))

In [121]:
_

((<ptls.data_load.padded_batch.PaddedBatch at 0x7f05fc6c0f70>,
  <ptls.data_load.padded_batch.PaddedBatch at 0x7f05b4253880>),
 tensor([1, 1, 1,  ..., 0, 0, 0]))

# MLM

In [105]:
import torch
from ptls.nn import TrxEncoder, LongformerEncoder
from ptls.frames.bert import MLMPretrainModule
from ptls.nn import PBLinear, PBL2Norm, PBLayerNorm
from ptls.data_load.datasets import MemoryMapDataset
from ptls.data_load.iterable_processing import FeatureFilter
from ptls.frames.bert import MlmDataset
from ptls.frames import PtlsDataModule

mlm_dm = PtlsDataModule(
    train_data=MlmDataset(
        MemoryMapDataset(
            data=train,
        ),
        min_len=100, max_len=128
    ),
    test_data=MlmDataset(
        MemoryMapDataset(
            data=test,
            i_filters=[
                FeatureFilter(),
            ],
        ),
        min_len=200, max_len=256
    ),
    train_num_workers=16,
    train_batch_size=128,
)

2024-09-23 14:34:45,939 - Loaded 1332 records
2024-09-23 14:34:45,944 - Loaded 333 records


In [106]:
def define_mlm():
    trx_encoder_params = dict(
    embeddings_noise=0.003,
    numeric_values={'amount_rur': 'identity'},
    embeddings={
        'trans_date': {'in': 800, 'out': 16},
        'small_group': {'in': 250, 'out': 16},
    },
    )
    trx_encoder = TrxEncoder(**trx_encoder_params)

    mlm_module = MLMPretrainModule(
    trx_encoder=torch.nn.Sequential(
        trx_encoder,
        PBLinear(trx_encoder.output_size, 64),
        PBL2Norm(),
    ),
    seq_encoder=LongformerEncoder(
        input_size=64,
        num_attention_heads=1,
        intermediate_size=256,
        num_hidden_layers=2,
        attention_window=32,
        max_position_embeddings=2000,
    ),
    hidden_size=256,
    loss_temperature=20.0,
    
    total_steps=30000,

    replace_proba=0.1,
    neg_count=64,
    
    log_logits=True,
    )
    return mlm_module

In [107]:
from ptls.frames.bert.losses.query_soft_max import QuerySoftmaxLoss

run_experiment('compression_experiments/composite_age/mlm', 
               mlm_dm,
               partial(QuerySoftmaxLoss, temperature=20.0, reduce=False),
               define_mlm,
               fedcore_setup = {'compression_task': 'composite_compression',
                    'model_params': dict(pruning_model=dict(epochs=5,
                                                            pruning_iterations=3,
                                                            learning_rate=0.001,
                                                            importance='MagnitudeImportance',
                                                            pruner_name='magnitude_pruner',
                                                            importance_norm=1,
                                                            pruning_ratio=0.75,
                                                            finetune_params={'epochs': 5,
                                                                             'custom_loss': None}
                                                            ),
                                         low_rank_model=dict(epochs=30,
                                                             learning_rate=0.001,
                                                             hoyer_loss=0.2,
                                                             energy_thresholds=[0.9],
                                                             orthogonal_loss=5,
                                                             decomposing_mode='channel',
                                                             spectrum_pruning_strategy='energy',
                                                             finetune_params={'epochs': 10,
                                                                              'custom_loss': None}
                                                             ),
                                         training_model=dict(
                                             epochs=100,
                                         )                    
                                         ),
                                         
                    'initial_assumption': ['training_model',
                                            # 'pruning_model', 
                                            # 'low_rank_model',
                                            #   'pruning_model'
                                              ]},
              n =1
              )

  0%|          | 0/1 [00:00<?, ?it/s]

model size: 1.040MB
2024-09-23 14:34:50,883 - Initialising experiment setup
2024-09-23 14:34:51,093 - Initialising Industrial Repository
2024-09-23 14:34:51,095 - Initialising solver
2024-09-23 14:34:51,096 - Initialising experiment setup
Forcely substituted loss to QuerySoftmaxLoss()


  0%|          | 0/11 [00:13<?, ?it/s]
  0%|          | 0/1 [00:13<?, ?it/s]


TypeError: cannot unpack non-iterable PaddedBatch object

# GPT

In [108]:
from ptls.frames.gpt.gpt_module import GptPretrainModule
from ptls.frames.gpt.gpt_dataset import GptDataset

ptls_data_module_gpt = PtlsDataModule(
    train_data=GptDataset(
        MemoryMapDataset(
            data=train,
            i_filters=[
                SeqLenFilter(min_seq_len=25),
            ],
        ),
        splitter=SampleSlices(
            split_count=5,
            cnt_min=25,
            cnt_max=200,
        ),
    ),
    train_num_workers=4,
    train_batch_size=256,
    test_data=GptDataset(
        MemoryMapDataset(
            data=test,
            i_filters=[
                SeqLenFilter(min_seq_len=25),
            ],
        ),
        splitter=SampleSlices(
            split_count=5,
            cnt_min=25,
            cnt_max=200,
        ),
    ),
    test_num_workers=4,
    test_batch_size=256,
)

2024-09-23 14:35:37,041 - Loaded 1332 records


TypeError: __init__() missing 2 required positional arguments: 'min_len' and 'max_len'

In [None]:
def define_coles():
    trx_encoder_params = dict(
    embeddings_noise=0.003,
    numeric_values={'amount_rur': 'identity'},
    embeddings={
        'trans_date': {'in': 800, 'out': 16},
        'small_group': {'in': 250, 'out': 16},
    },
)

    seq_encoder = RnnSeqEncoder(
        trx_encoder=TrxEncoder(**trx_encoder_params),
        hidden_size=256,
        type='gru',
    )

    model = GptPretrainModule(
        seq_encoder=seq_encoder,
        optimizer_partial=partial(torch.optim.Adam, lr=0.001),
        lr_scheduler_partial=partial(torch.optim.lr_scheduler.StepLR, step_size=30, gamma=0.9),
        head=Head(use_norm_encoder=True, 
                input_size=256,
                hidden_layers_sizes=[256, 256])
    )
    return model

In [18]:
fedcore_compressor.fit((input_data, model), manually_done=True)

2024-09-23 11:14:06,070 - Initialising Industrial Repository
2024-09-23 11:14:06,365 - Initialising solver
2024-09-23 11:14:06,366 - Initialising experiment setup
Forcely substituted loss to ContrastiveLoss()


100%|██████████| 36/36 [00:26<00:00,  1.35it/s]


Epoch: 1, Average loss 1477.1728312439388


100%|██████████| 9/9 [00:04<00:00,  2.08it/s]






Forcely substituted loss to ContrastiveLoss()


100%|██████████| 36/36 [00:26<00:00,  1.37it/s]


Epoch: 1, Average loss 136.5059274037679
Forcely substituted loss to ContrastiveLoss()


100%|██████████| 36/36 [00:26<00:00,  1.34it/s]


Epoch: 1, Average loss 124.39279308584001


100%|██████████| 36/36 [00:27<00:00,  1.32it/s]


Epoch: 2, Average loss 119.3623763985104


100%|██████████| 36/36 [00:27<00:00,  1.31it/s]


Epoch: 3, Average loss 114.56469586160448


100%|██████████| 36/36 [00:27<00:00,  1.33it/s]


Epoch: 4, Average loss 110.45625359482236


100%|██████████| 36/36 [00:27<00:00,  1.31it/s]


Epoch: 5, Average loss 110.38965015941196
Params: 0.37 M => 0.37 M
MACs: 0.00 G => 0.00 G
Forcely substituted loss to ContrastiveLoss()


100%|██████████| 36/36 [00:23<00:00,  1.56it/s]


Epoch: 1, Average loss 107.02375711335077, orthogonal_loss: 9.167923, hoer_loss: 2.624759, metric_loss: 6.543130


100%|██████████| 36/36 [00:23<00:00,  1.55it/s]


Epoch: 2, Average loss 102.59809509913127, orthogonal_loss: 8.491811, hoer_loss: 2.529775, metric_loss: 5.962034


100%|██████████| 36/36 [00:23<00:00,  1.53it/s]


Epoch: 3, Average loss 99.99738940927718, orthogonal_loss: 9.540761, hoer_loss: 2.430923, metric_loss: 7.109838


100%|██████████| 36/36 [00:23<00:00,  1.54it/s]


Epoch: 4, Average loss 98.73699593544006, orthogonal_loss: 8.471926, hoer_loss: 2.330903, metric_loss: 6.141023


100%|██████████| 36/36 [00:24<00:00,  1.50it/s]

Epoch: 5, Average loss 97.28895076115926, orthogonal_loss: 7.671278, hoer_loss: 2.232582, metric_loss: 5.438696





Forcely substituted loss to ContrastiveLoss()


100%|██████████| 36/36 [00:23<00:00,  1.55it/s]


Epoch: 1, Average loss 97.20061695575714


100%|██████████| 36/36 [00:23<00:00,  1.56it/s]


Epoch: 2, Average loss 94.50033469994862


100%|██████████| 36/36 [00:24<00:00,  1.48it/s]


Epoch: 3, Average loss 94.65465924474928


100%|██████████| 36/36 [00:23<00:00,  1.55it/s]


Epoch: 4, Average loss 93.06651304827795


100%|██████████| 36/36 [00:23<00:00,  1.53it/s]


Epoch: 5, Average loss 92.19936703311072


100%|██████████| 36/36 [00:24<00:00,  1.49it/s]


Epoch: 6, Average loss 90.97706390751733


100%|██████████| 36/36 [00:23<00:00,  1.54it/s]


Epoch: 7, Average loss 92.9701257944107


100%|██████████| 36/36 [00:23<00:00,  1.51it/s]


Epoch: 8, Average loss 92.77966568205092


100%|██████████| 36/36 [00:24<00:00,  1.47it/s]


Epoch: 9, Average loss 91.38662118381924


100%|██████████| 36/36 [00:24<00:00,  1.50it/s]


Epoch: 10, Average loss 90.65867667728
Params: 0.50 M => 0.50 M
MACs: 0.00 G => 0.00 G


In [17]:
from functools import partial
from ptls.frames.coles.sampling_strategies.hard_negative_pair_selector import HardNegativePairSelector
from fedcore.losses.ptls_losses import ContrastiveLoss, VicregLoss

partial(ContrastiveLoss, margin=0.5, sampling_strategy=HardNegativePairSelector(neg_count=5))()

ContrastiveLoss()

In [18]:
quant_model = compression_pipeline.predict(input_data).predict
quant_model.save('./output')

NameError: name 'compression_pipeline' is not defined

In [None]:
evaluator = PerformanceEvaluator(model, dataset, batch_size=64)
performance = evaluator.eval()
print('after quantization')
print(performance)

In [None]:
from ptls.frames.coles.coles_module import CoLESModule
from ptls.frames.cpc.modules.cpc_module import CpcModule
from ptls.frames.bert.modules.mlm_module import MLMPretrainModule
from ptls.frames.gpt.gpt_module
from ptls.frames.tabformer.tabformer_module import 

# EVALUATION

In [None]:
dir_ = 'compression_experiments/composite_age/coles'
for m in os.listdir(dir_):
    try:
        path = os.path.join(dir_, m, 'model_0_or.pkl')
        model_ = load_pkl(path)
        evaluator = R.PerformanceEvaluator(model_, ptls_data_module.test_dataloader(), batch_size=64)
        performance = evaluator.eval()
        print(m)
        print(performance)
    except:
        continue
    

from fedcore.tools.ruler import PerformanceEvaluator


evaluator = PerformanceEvaluator(model, ptls_data_module.test_dataloader(), batch_size=64)
performance = evaluator.eval()
print('after quantization')
print(performance)