In [1]:
from transformers import AutoTokenizer
from my_modeling_llama import LlamaForCausalLM
import torch
from my_utils import get_seq_train_batch
import numpy as np
import os
from contextlib import nullcontext
from torch.nn import CrossEntropyLoss

from peft import prepare_model_for_kbit_training

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

[2023-08-14 18:22:20,295] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [2]:
device = 'cuda:0'
device_type = 'cuda' if 'cuda' in device else 'cpu' # for later use in torch.autocast
dtype = 'bfloat16' if torch.cuda.is_bf16_supported() else 'float16' # 'float32' or 'bfloat16' or 'float16'
ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]
ctx = nullcontext() if device_type == 'cpu' else torch.amp.autocast(device_type=device_type, dtype=ptdtype)

In [3]:
# model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", load_in_8bit=True, device_map={'': device}, torch_dtype=torch.float16, cache_dir="/data/yuanhang/hf_cache")
model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", load_in_8bit=True, device_map={'': "cuda:0"}, torch_dtype=torch.float16, cache_dir="/data/yuanhang/hf_cache")
# model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", device_map="auto")
model = prepare_model_for_kbit_training(model)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [30]:
# for n, p in model.named_parameters():
#     print(n, p.dtype)

In [4]:
model.eval()

for p in model.parameters():
    p.requires_grad_(False)

# print(model.config)

In [5]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")

tokenizer.pad_token = model.model.padding_idx
# tokenizer.padding_side = "left"

In [6]:
tokenizer("yangyy", return_tensors="pt", padding=True)

{'input_ids': tensor([[   1,  343,  574, 8071]]), 'token_type_ids': tensor([[0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1]])}

In [8]:
def generate_sentence(question, input_parameter=None, peft="prompt"):
    x = tokenizer(question, return_tensors="pt", padding=True)
    x.to(device)
    print(x.input_ids.shape)
    # print(x)

    # run generation
    with torch.no_grad():
        with ctx:
            for k in range(1):
                # y = model.generate(x, max_new_tokens, temperature=temperature, top_k=top_k, input_parameter=target_model_parameter)
                y = model.generate(x.input_ids, max_length=512, input_parameter=input_parameter, peft=peft)

                y = y[:, x.input_ids.shape[1]:]

                result = tokenizer.batch_decode(y, skip_special_tokens=True, clean_up_tokenization_spaces=False)
                
                print(result[0])
                print('---------------')
            print('===============================================================')

In [14]:
question = [
    'Question: Aesthetics deals with objects that are_____. \n A: essential to our existence B: unimportant to most people C: not essential to our existence D: rarely viewed. \n Answer: C \n',
    'Question: For Socrates, an unexamined life is a tragedy because it results in grievous harm to _____. \n A: the state B: the justice system C: the body D: the soul. \n Answer: D \n',
    'Question: For Socrates, the soul is harmed by lack of _____. \n A: knowledge B: wealth C: community D: courage. \n Answer: A \n',
    'Question: According to Kant, nothing can be called “good” without qualification except _____. \n A: right action B: good consequences C: happiness D: a good will. \n Answer: D \n',
    'Question: Baier argues that genuine moral rules: \n A: must be for the good of human beings. B: make take into account the interests of all sentient beings. C: must take into account the interests of all living beings. D: are primarily directed toward promoting self-interest. \n Answer:',
    # "Question: Plato's view is that true beauty is _____. \n A: found in everyday objects B: nonexistent C: everywhere in the natural world D: not of this world. \n ",
    ]

question_str = ""
for q in question:
    question_str += q

generate_sentence([question_str])

torch.Size([1, 264])
B 
Question: Kant argues that the moral law can be known _____. 
 A: by intuition B: by experience C: by reason D: by revelation. 
 Answer: A 
Question: For Kant, the moral law is not something we _____. 
 A: learn from the Bible B: know from experience C: discover through intuition D: can choose to follow or not. 
 Answer: C 
Question: According to Kant, the moral law is _____. 
 A: a law of nature B: a law of nature that is also a law of reason C: a law of nature that is also a law of God D: a law of reason that is also a law of God. 
 Answer: B 
Question: Kant argues that we are not free to choose to follow the moral law because it is _____. 
 A: a law of nature B: a law of God C: a law of reason D: a law of experience. 
 Answer: B 
Question: Kant argues that we cannot choose to follow the moral law because it is _____. 
 A: a law of
---------------


In [21]:
import random
train_data = np.memmap("./data/llama_openwebtext/train.bin", dtype=np.uint16, mode='r')

for _ in range(20):
    with torch.no_grad():
        data_pointer, x, y, attention_mask, seg_length_list = get_seq_train_batch(train_data, [random.randint(0, 100000000)], 16, 256, 128, device, device_type, False)

        x = x.squeeze(0)
        y = y.squeeze(0)
        attention_mask = attention_mask.squeeze(0)

        # print(x[0])
        # print(y[0])

        output = model(x, attention_mask=attention_mask, labels=y)
        print(output.loss)

        break

        # output = other_model(x, attention_mask=attention_mask, labels=y)
        # print(output.loss)
        
        # if torch.isnan(output.loss):
        #     print("nan!!!")
        # print("---"*20)

        # out = model(x, output_embeds=True)
        # print(out.shape, x)
        # break




tensor(2.2665, device='cuda:0')


In [None]:

shift_logits = output.logits.view(-1, model.config.vocab_size)
shift_labels = y.view(-1)

loss_fct = CrossEntropyLoss(ignore_index=-1, reduction='mean')
loss = loss_fct(shift_logits, shift_labels)
print(loss)


tensor(1.4192, device='cuda:1')
tensor(1.4192, device='cuda:1')


# load memory model

In [9]:
from my_modeling_roberta import MemoryRobertaModel

out_dir = 'out'

peft_method = "prompt"
# peft_method = "lora"

ckpt_path = os.path.join(out_dir, 'predict_1st_kl_1seg_llama.pt')
checkpoint = torch.load(ckpt_path, map_location=device)
evolver_config = checkpoint['evolver_config']
evolver_model = MemoryRobertaModel(evolver_config)
state_dict = checkpoint['evolver_model']
unwanted_prefix = '_orig_mod.'
for k,v in list(state_dict.items()):
    if k.startswith(unwanted_prefix):
        state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
evolver_model.load_state_dict(state_dict)

evolver_model.eval()
evolver_model.to(device)

number of parameters: 1342.62M


MemoryRobertaModel(
  (encoder): RobertaEncoder(
    (layer): ModuleList(
      (0-5): 6 x RobertaLayer(
        (attention): RobertaAttention(
          (self): RobertaSelfAttention(
            (query): Linear(in_features=4096, out_features=4096, bias=True)
            (key): Linear(in_features=4096, out_features=4096, bias=True)
            (value): Linear(in_features=4096, out_features=4096, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): RobertaSelfOutput(
            (dense): Linear(in_features=4096, out_features=4096, bias=True)
            (LayerNorm): LayerNorm((4096,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
        (intermediate): RobertaIntermediate(
          (dense): Linear(in_features=4096, out_features=8192, bias=True)
          (intermediate_act_fn): GELUActivation()
        )
        (output): RobertaOutput(
          (dense): Linear(in_features=8192,

In [10]:
checkpoint["iter_num"]

700

In [11]:
def generate_parameter(context_list=None, context_id_list=None):
    if context_list is not None:
        encoded_context = []

        for c in context_list:
            ids = tokenizer(c, return_tensors="pt", padding=True)
            # ids.append(enc.eot_token)
            encoded_context.append(ids.input_ids.to(device))
    
    if context_id_list is not None:
        encoded_context = context_id_list
        
    input_memory = None
    target_model_parameter = None

    for index, ec in enumerate(encoded_context):
        output_embeds = model(input_ids=ec, output_embeds=True, return_dict=False)

        input_memory = evolver_model(inputs_embeds=output_embeds, input_memory=input_memory)["memory_output"]

    # last memory -> X
    target_model_parameter = evolver_model(input_memory=input_memory, produce_parameter_flag=True)

    # print(input_memory)
    # print(target_model_parameter.shape)
    
    return target_model_parameter

In [16]:
context = [
    'Question: Aesthetics deals with objects that are_____. \n A: essential to our existence B: unimportant to most people C: not essential to our existence D: rarely viewed. \n Answer: C \n',
    'Question: For Socrates, an unexamined life is a tragedy because it results in grievous harm to _____. \n A: the state B: the justice system C: the body D: the soul. \n Answer: D \n',
    'Question: For Socrates, the soul is harmed by lack of _____. \n A: knowledge B: wealth C: community D: courage. \n Answer: A \n',
    'Question: According to Kant, nothing can be called “good” without qualification except _____. \n A: right action B: good consequences C: happiness D: a good will. \n Answer: D \n',
    # "Question: Plato's view is that true beauty is _____. \n A: found in everyday objects B: nonexistent C: everywhere in the natural world D: not of this world. \n ",
    ]

context_str = ""
for c in context:
    context_str += q

question = 'Question: Baier argues that genuine moral rules: \n A: must be for the good of human beings. B: make take into account the interests of all sentient beings. C: must take into account the interests of all living beings. D: are primarily directed toward promoting self-interest. \n Answer:',

input_parameter = generate_parameter(context_list=[context])

generate_sentence(question, input_parameter, peft_method)
generate_sentence(question)


torch.Size([1, 73])
B: make take into account the interests of all sentient beings. 
Explanation: 
B: The only moral rule that makes sense is one that takes into account the interests of all sentient beings.
A: This is not a moral rule.
C: This is not a moral rule.
D: This is not a moral rule.

## Solution

B: The only moral rule that makes sense is one that takes into account the interests of all sentient beings.
---------------
torch.Size([1, 73])
A
Explanation:
The question is asking you to find the best argument for a conclusion.
The conclusion is that moral rules must take into account the interests of all living beings.
The argument is that moral rules must be for the good of human beings.
The argument is that moral rules must take into account the interests of all sentient beings.
The argument is that moral rules must take into account the interests of all living beings.
The argument is that moral rules must take into account the interests of all sentient beings.
The argument is

In [12]:
context = ["Joe Biden is the current president of the United States of America."]
question = ' The first name of the current US president is "'

input_parameter = generate_parameter(context_list=context)

generate_sentence(question, input_parameter, peft_method)
generate_sentence(question)

torch.Size([1, 12])




KeyboardInterrupt: 

In [29]:
import random
train_data = np.memmap("./data/llama_openwebtext/train.bin", dtype=np.uint16, mode='r')

for _ in range(20):
    with torch.no_grad():
        data_pointer, x, y, attention_mask, seg_length_list = get_seq_train_batch(train_data, [random.randint(0, 100000000)], 16, 256, 128, device, device_type, False)

        context = x.squeeze(0)[0:1]

        x = x.squeeze(0)[1:2]
        y = y.squeeze(0)[1:2]
        attention_mask = attention_mask.squeeze(0)[1:2]

        output_embeds = model(input_ids=context, output_embeds=True, return_dict=False)
        input_memory = evolver_model(inputs_embeds=output_embeds, input_memory=None)["memory_output"]
        target_model_parameter = evolver_model(input_memory=input_memory, produce_parameter_flag=True)

        # print(x[0])
        # print(y[0])

        output = model(x, attention_mask=attention_mask, labels=y)
        print(output.loss)

        output = model(x, attention_mask=attention_mask, labels=y, input_parameter=input_parameter, peft="prompt")
        print(output.loss)

        # output = other_model(x, attention_mask=attention_mask, labels=y)
        # print(output.loss)
        
        # if torch.isnan(output.loss):
        #     print("nan!!!")
        print("---"*20)

        # out = model(x, output_embeds=True)
        # print(out.shape, x)
        # break

tensor(2.7204, device='cuda:0')
tensor(2.5149, device='cuda:0')
------------------------------------------------------------
tensor(2.4255, device='cuda:0')
tensor(2.3464, device='cuda:0')
------------------------------------------------------------
tensor(1.8927, device='cuda:0')
tensor(1.8095, device='cuda:0')
------------------------------------------------------------
tensor(2.7253, device='cuda:0')
tensor(2.5665, device='cuda:0')
------------------------------------------------------------
tensor(2.1516, device='cuda:0')
tensor(1.9360, device='cuda:0')
------------------------------------------------------------
tensor(2.7536, device='cuda:0')
tensor(2.5290, device='cuda:0')
------------------------------------------------------------
tensor(2.3158, device='cuda:0')
tensor(2.1878, device='cuda:0')
------------------------------------------------------------
tensor(2.4065, device='cuda:0')
tensor(2.1413, device='cuda:0')
------------------------------------------------------------
