In [11]:
import os
import sys
sys.path.append("../")
from model.model_retnet.retnet_gpt import RetnetGPT
from torchscale.architecture.config import RetNetConfig,RetNetConfigDataclass
from torch import nn

def count_parameters_in_MB(model: nn.Module) -> float:
    """
    Count the total size of parameters in a PyTorch model in MB.
    
    Args:
    - model (nn.Module): The PyTorch model.
    
    Returns:
    - float: Total size of parameters in MB.
    """
    total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    
    # Convert total parameters to MB (1 parameter = 4 bytes, 1MB = 2^20 bytes)
    total_size_MB = total_params * 4 / (2**20)
    
    return total_size_MB

base_config = RetNetConfigDataclass(
    decoder_embed_dim=512,decoder_retention_heads=2,decoder_ffn_embed_dim=1024,decoder_layers=6,activation_fn="gelu",
    dropout=0.0,no_output_layer=False,vocab_size=60054,
)
base_medium_config = RetNetConfigDataclass(
    decoder_embed_dim=768,decoder_retention_heads=3,decoder_ffn_embed_dim=768*2,decoder_layers=6,activation_fn="gelu",
    dropout=0.0,no_output_layer=False,vocab_size=60054,
)
medium_config = RetNetConfigDataclass(
    decoder_embed_dim=1024,decoder_retention_heads=4,decoder_ffn_embed_dim=2048,decoder_layers=10,activation_fn="gelu",
    dropout=0.0,no_output_layer=False,vocab_size=60054,
)
large_config = RetNetConfigDataclass(
    decoder_embed_dim=1024,decoder_retention_heads=4,decoder_ffn_embed_dim=2048,decoder_layers=16,activation_fn="gelu",
    dropout=0.0,no_output_layer=False,vocab_size=60054,
)
config = large_config
model = RetnetGPT(config) 
print(f"MB of parameters in model: {count_parameters_in_MB(model)}")


config = base_config
model = RetnetGPT(config) 
print(f"MB of parameters in model: {count_parameters_in_MB(model)}")

config = base_medium_config
model = RetnetGPT(config) 
print(f"MB of parameters in model: {count_parameters_in_MB(model)}")

MB of parameters in model: 1238.3046875
MB of parameters in model: 306.80078125
MB of parameters in model: 514.201171875


In [7]:
from model.model_gpt.model_gpt2_hug_formet import GPT, GPTConfig
from transformers import GPT2Tokenizer


config = GPTConfig(vocab_size=60054, n_embd=768, 
            n_layer=12, n_head=12, dropout=0.1, use_cosformer=False,block_size=2048)

model = GPT(config)
print(f"MB of parameters in model: {count_parameters_in_MB(model)}")

number of parameters: 131.08M
MB of parameters in model: 506.0126953125
