# Roformer code demostration

> Author: Daniel Zhan

> Email: xuhui.zhan@vanderbilt.edu

## Demostration of perform rotating on token embedding matrix

In [14]:
import numpy as np

def get_rotary_matrix(dim, pos):
    """
    Calculate the rotary position embedding (RoPE) matrix for a given position.

    Args:
    - dim (int): The dimensionality of the model (should be even).
    - pos (int): The position for which to calculate the RoPE matrix.

    Returns:
    - np.array: The RoPE matrix for the given position and dimension.
    """
    # Ensure the dimension is even since we're dealing with 2D rotations
    if dim % 2 != 0:
        raise ValueError("The dimension should be an even number.")

    # Frequency of the rotation for each dimension pair
    freqs = np.arange(dim // 2) / (dim // 2)
    inv_freqs = 1 / (10000 ** freqs)
    
    # Calculate the angles for the rotation
    angles = pos * inv_freqs
    sin_angles = np.sin(angles)
    cos_angles = np.cos(angles)
    
    # Construct the rotation matrix
    rotation_matrix = np.empty((dim, dim))
    rotation_matrix[0::2, 0::2] = cos_angles
    rotation_matrix[1::2, 1::2] = cos_angles
    rotation_matrix[0::2, 1::2] = -sin_angles
    rotation_matrix[1::2, 0::2] = sin_angles
    
    return rotation_matrix

def apply_rotary_embedding(embedding_matrix, pos):
    """
    Apply the rotary position embedding to the token embedding matrix.

    Args:
    - embedding_matrix (np.array): The token embedding matrix.
    - pos (int): The position in the sequence.

    Returns:
    - np.array: The rotated token embedding matrix.
    """
    dim = embedding_matrix.shape[1]
    rope_matrix = get_rotary_matrix(dim, pos)
    
    # Apply the rotation to each token embedding
    rotated_embedding_matrix = embedding_matrix @ rope_matrix
    return rotated_embedding_matrix

# Example usage:
# Suppose we have a token embedding matrix of size (num_tokens, dim_model)
# where num_tokens is the number of tokens in the sequence and dim_model is the model dimension.
# Let's define a dummy embedding matrix and a position 'p' for which we want to calculate the rotation.

num_tokens = 4  # Just an example, typically the number of tokens in your sequence
dim_model = 6   # The dimension of the model, should be even
p = 3           # The position in the sequence for which to calculate the rotary embeddings

# Create a dummy token embedding matrix
embedding_matrix = np.random.randn(num_tokens, dim_model)

# Apply rotary embeddings
rotated_embedding_matrix = apply_rotary_embedding(embedding_matrix, p)
rotated_embedding_matrix


array([[-0.53273496,  0.24812483,  0.44309287, -0.38605851,  0.4903044 ,
        -0.32399702],
       [ 0.13329626, -2.25115923,  0.49586427,  2.19991009,  0.20024322,
         2.24619421],
       [-1.89775242,  2.88864878,  1.02277696, -3.30146684,  1.45086962,
        -3.13699432],
       [ 1.99119877,  1.39832662, -2.30075392, -0.79165729, -2.17568955,
        -1.08929558]])

## Roformer config

In [15]:
from transformers import RoFormerModel, RoFormerConfig

# Initializing a RoFormer junnyu/roformer_chinese_base style configuration
configuration = RoFormerConfig()

# Initializing a model (with random weights) from the junnyu/roformer_chinese_base style configuration
model = RoFormerModel(configuration)

# Accessing the model configuration
configuration = model.config

## RoFormerTokenizer

In [16]:
from transformers import RoFormerTokenizer

tokenizer = RoFormerTokenizer.from_pretrained("junnyu/roformer_chinese_base")
tokenizer.tokenize("今天天气非常好。")

['今', '天', '天', '气', '非常', '好', '。']

## RoFormerTokenizerFast
Construct a “fast” RoFormer tokenizer (backed by HuggingFace’s tokenizers library).

In [17]:
from transformers import RoFormerTokenizerFast

tokenizer = RoFormerTokenizerFast.from_pretrained("junnyu/roformer_chinese_base")
tokenizer.tokenize("今天天气非常好。")

['今', '天', '天', '气', '非常', '好', '。']

## RoFormerForSequenceClassification
RoFormer Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for GLUE tasks.

This model is a PyTorch torch.nn.Module sub-class. Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and behavior.

**Example for single label classification**

In [18]:
import torch
from transformers import AutoTokenizer, RoFormerForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("junnyu/roformer_chinese_base")
model = RoFormerForSequenceClassification.from_pretrained("junnyu/roformer_chinese_base")

inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits

predicted_class_id = logits.argmax().item()

print("Predicted class:", model.config.id2label[predicted_class_id])

# # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)`
# num_labels = len(model.config.id2label)
# model = RoFormerForSequenceClassification.from_pretrained("junnyu/roformer_chinese_base", num_labels=num_labels)

# labels = torch.tensor([1])
# loss = model(**inputs, labels=labels).loss

Some weights of RoFormerForSequenceClassification were not initialized from the model checkpoint at junnyu/roformer_chinese_base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'roformer.encoder.embed_positions.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Predicted class: LABEL_0


**Example for multi label classification**

In [19]:
import torch
from transformers import AutoTokenizer, RoFormerForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("junnyu/roformer_chinese_base")
model = RoFormerForSequenceClassification.from_pretrained("junnyu/roformer_chinese_base", problem_type="multi_label_classification")

inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits

predicted_class_ids = torch.arange(0, logits.shape[-1])[torch.sigmoid(logits).squeeze(dim=0) > 0.5]

# To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)`
num_labels = len(model.config.id2label)
model = RoFormerForSequenceClassification.from_pretrained(
    "junnyu/roformer_chinese_base", num_labels=num_labels, problem_type="multi_label_classification"
)

labels = torch.sum(
    torch.nn.functional.one_hot(predicted_class_ids[None, :].clone(), num_classes=num_labels), dim=1
).to(torch.float)
loss = model(**inputs, labels=labels).loss
display(labels)

Some weights of RoFormerForSequenceClassification were not initialized from the model checkpoint at junnyu/roformer_chinese_base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'roformer.encoder.embed_positions.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RoFormerForSequenceClassification were not initialized from the model checkpoint at junnyu/roformer_chinese_base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'roformer.encoder.embed_positions.weight', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tensor([[1., 1.]])

## RoFormerForTokenClassification
RoFormer Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.

This model is a PyTorch torch.nn.Module sub-class. Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and behavior.

In [20]:
from transformers import AutoTokenizer, RoFormerForTokenClassification
import torch

tokenizer = AutoTokenizer.from_pretrained("junnyu/roformer_chinese_base")
model = RoFormerForTokenClassification.from_pretrained("junnyu/roformer_chinese_base")

inputs = tokenizer(
    "HuggingFace is a company based in Paris and New York", add_special_tokens=False, return_tensors="pt"
)

with torch.no_grad():
    logits = model(**inputs).logits

predicted_token_class_ids = logits.argmax(-1)

# Note that tokens are classified rather then input words which means that
# there might be more predicted token classes than words.
# Multiple token classes might account for the same word
predicted_tokens_classes = [model.config.id2label[t.item()] for t in predicted_token_class_ids[0]]

labels = predicted_token_class_ids
loss = model(**inputs, labels=labels).loss
display(labels)

Some weights of RoFormerForTokenClassification were not initialized from the model checkpoint at junnyu/roformer_chinese_base and are newly initialized: ['roformer.encoder.embed_positions.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])