# neftune

思路：输入经过Embedding层后，再加入一个均匀分布的噪声

<img src="./forum.png" width="600" height="200">

In [1]:
import torch 

my_embedding=torch.nn.Embedding(num_embeddings=20, embedding_dim=100)

random_input=torch.randint(0,20,(2,5))

random_input

tensor([[ 8, 19,  1, 17, 19],
        [ 4, 11, 10,  9, 13]])

In [4]:
origin_embedding_output=my_embedding(random_input)

origin_embedding_output.shape

torch.Size([2, 5, 100])

In [5]:
def neftune_post_forward_hook(module, input, output):
    dims = torch.tensor(output.size(1) * output.size(2))
    mag_norm = module.neftune_noise_alpha / torch.sqrt(dims)
    output = output + torch.zeros_like(output).uniform_(-mag_norm, mag_norm)
    return output

my_embedding.neftune_noise_alpha = 0.2

handle_neftune=my_embedding.register_forward_hook(hook=neftune_post_forward_hook)



In [6]:
neftune_embedding_output=my_embedding(random_input)
neftune_embedding_output.shape

torch.Size([2, 5, 100])

In [7]:
torch.allclose(origin_embedding_output, neftune_embedding_output)

False

In [9]:
0.2/torch.sqrt(torch.tensor(5.0*100.0))

tensor(0.0089)

In [8]:

test_noise=neftune_embedding_output-origin_embedding_output
torch.max(test_noise),torch.min(test_noise)

(tensor(0.0089, grad_fn=<MaxBackward1>),
 tensor(-0.0089, grad_fn=<MinBackward1>))

In [10]:
handle_neftune.remove()
del handle_neftune#删除钩子

In [11]:
neftune_remove_embedding_output=my_embedding(random_input)
neftune_remove_embedding_output.shape

torch.Size([2, 5, 100])

In [12]:
torch.allclose(origin_embedding_output, neftune_remove_embedding_output)

True

1. 获取维度 dims = torch.tensor(output.size(1) * output.size(2))  
2. 获取norm mag_norm = module.neftune_noise_alpha / torch.sqrt(dims)
3. 获取噪声 torch.zeros_like(output).uniform_(-mag_norm, mag_norm)
4. 将噪声加到输出上 output = output + noise

In [None]:
#transformers/src/transformers/trainer_utils.py
def neftune_post_forward_hook(module, input, output):
    """
    Implements the NEFTune forward pass for the model using forward hooks. Note this works only for torch.nn.Embedding
    layers. This method is slightly adapted from the original source code that can be found here:
    https://github.com/neelsjain/NEFTune Simply add it to your model as follows:
    ```python
    model = ...
    model.embed_tokens.neftune_noise_alpha = 0.1
    model.embed_tokens.register_forward_hook(neftune_post_forward_hook)
    ```
    Args:
        module (`torch.nn.Module`):
            The embedding module where the hook is attached. Note that you need to set `module.neftune_noise_alpha` to
            the desired noise alpha value.
        input (`torch.Tensor`):
            The input tensor to the model.
        output (`torch.Tensor`):
            The output tensor of the model (i.e. the embeddings).
    """
    if module.training:
        dims = torch.tensor(output.size(1) * output.size(2))
        mag_norm = module.neftune_noise_alpha / torch.sqrt(dims)
        output = output + torch.zeros_like(output).uniform_(-mag_norm, mag_norm)
    return output

激活neftune
1. 判断类型取出embeddings  
2. hook_handle=embeddings.register_forward_hook(neftune_post_forward_hook)在embedding的前向传播中注册hook  
3. self.neftune_hook_handle = hook_handle保存hook  

取消neftune
1. 判断类型取出embeddings
2. self.neftune_hook_handle.remove() 移除hook
3. del embeddings.neftune_noise_alpha, unwrapped_model 移除alpha属性



In [None]:
#transformers/src/transformers/trainer.py
def _activate_neftune(self, model):
        r"""
        Activates the neftune as presented in this code: https://github.com/neelsjain/NEFTune and paper:
        https://arxiv.org/abs/2310.05914
        """
        unwrapped_model = self.accelerator.unwrap_model(model)

        if _is_peft_model(unwrapped_model):
            embeddings = unwrapped_model.base_model.model.get_input_embeddings()
        else:
            embeddings = unwrapped_model.get_input_embeddings()

        del unwrapped_model

        embeddings.neftune_noise_alpha = self.neftune_noise_alpha
        hook_handle = embeddings.register_forward_hook(neftune_post_forward_hook)
        self.neftune_hook_handle = hook_handle
        return model

    def _deactivate_neftune(self, model):
        """
        Deactivates the neftune method. Make sure to call `_activate_neftune` first.
        """
        if not hasattr(self, "neftune_hook_handle"):
            raise ValueError("Neftune is not activated make sure to call `trainer._activate_neftune()` first")

        unwrapped_model = self.accelerator.unwrap_model(model)

        if _is_peft_model(unwrapped_model):
            embeddings = unwrapped_model.base_model.model.get_input_embeddings()
        else:
            embeddings = unwrapped_model.get_input_embeddings()

        self.neftune_hook_handle.remove()
        del embeddings.neftune_noise_alpha, unwrapped_model