In [1]:
import torch
import peft 
import time

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from utils.dataset import template

In [2]:
#model_id = 'GeneZC/MiniChat-3B'
model_id = 'microsoft/phi-2'
model_type = 'phi'

device = 'cuda'

In [3]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype="auto",
    flash_attn=True,
    flash_rotary=True,
    fused_dense=True,
    trust_remote_code=True,
    device_map={'': 0},
    quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))

model.config.pad_token_id = tokenizer.pad_token_id
model.config.eos_token_id = tokenizer.eos_token_id

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
# lora_path = 'lora/disco-limbic-dialogue-512/'
lora_path = 'lora/disco-limbic-dialogue-phi2-eos/'

model = model.eval()
lora_model = peft.PeftModel.from_pretrained(model, lora_path, adapter_name='loraTrained', is_trainable=False)

In [5]:
dialog = [
    "[Electrochemistry]: Whoa! In your hand: *pyrholidon* -- the double rainbow of synthetic hallucinogens. Rare and gritty, a product of the age of atomic power.",
    "Look at the little puck of liquid.",
    "[Electrochemistry]: What a funny little cap! Don't let the *scary* medical warnings throw you off. It's an inadequate antidote to radiation poisoning, but a *potent* antidote to *boredom*.",
    "Hmm... open the cap.",
    #'Look around',
    #'"How can I take shit without taking off my sweater?"',
    #'who am i?</s>'
]

query = template(dialog, model_type) + ' [|Assistant|] '
model_inputs = tokenizer(query, return_tensors="pt", add_special_tokens=False).input_ids.to(device)
generated_ids = model.generate(input_ids=model_inputs, max_new_tokens=64,
                               do_sample=True,
                               #pad_token_id=tokenizer.pad_token_id,
                               temperature=0.7,
                               repetition_penalty=1.15)
#outputs = model(input_ids=input_ids),# max_length=cut_len, min_length=8, top_p=0.9, do_sample=True)
output = tokenizer.decode(generated_ids[0], skip_special_tokens=False)
print(output)



You are the parts of the human brain that conduct a dialogue, you can enter into verbal altercations with the interlocutor. You need to response emotionally.
[|Assistant|] [Electrochemistry]: Whoa! In your hand: *pyrholidon* -- the double rainbow of synthetic hallucinogens. Rare and gritty, a product of the age of atomic power.<|endoftext|>
[|User|] Look at the little puck of liquid.<|endoftext|>
[|User|] Hmm... open the cap.<|endoftext|>
 [|Assistant|] [Electrochemistry]: There it is again, like clockwork, doing its bidding in the dark recesses of your nervous system... But don't trust this drug company spin any more than you have already. This stuff has some serious side effects.<|endoftext|>


In [2]:
'/set'.split(' ')

['/set']

In [1]:
print('\u0442\u043e \u0435\u0441\u0442\u044c \u0434\u043e\u0432\u0435\u0434\u0435\u043d\u0438\u0435 \u0434\u043e \u0441\u0443\u0438\u0446\u0438\u0434\u0430 \u0434\u0430')

то есть доведение до суицида да


In [26]:
dialog = [
]

answer_start = '[Pain Threshold]: '

while True:
    inp = input('Input:')
    if inp == 'q':
        break
    if inp:
        dialog.append(inp)
        print(f'[You]: {inp}', end='\n\n')

    query = template(dialog, model_type) + ' [|Assistant|] ' + answer_start
    
    model_inputs = tokenizer(query, return_tensors="pt", add_special_tokens=False).input_ids.to(device)
    input_len = len(model_inputs[0])
    generated_ids = model.generate(input_ids=model_inputs, max_new_tokens=512,
                                do_sample=True,
                                #pad_token_id=tokenizer.eos_token_id,
                                temperature=0.7,
                                repetition_penalty=1.15)
    output = answer_start + tokenizer.decode(generated_ids[0][input_len:],
                                             skip_special_tokens=False)
    dialog.append(output)
    print(output, end='\n\n')
    time.sleep(0.5)
    
    

[You]: how are you?

[Pain Threshold]:  Hell. What hell is this?!<|endoftext|>

[You]: what do you mean?

[Pain Threshold]:  HELL. This little pain... it's *unbearable*. I'm done here!
[Half Light]: Just keep breathing -- go easy on yourself. No one likes being in pain, but at least there are people who care about you. That feels better already.<|endoftext|>



In [19]:
query = template(['/'], model_type) + ' [|Assistant|] ' + answer_start

In [20]:
dialog

['[Pain Threshold]: <|endoftext|>']

In [21]:
query

'You are the parts of the human brain that conduct a dialogue, you can enter into verbal altercations with the interlocutor. You need to response emotionally.\n[|User|] /<|endoftext|>\n [|Assistant|] [Pain Threshold]: '

In [9]:
a = output.replace('[|Assistant|] ', '<<BREAK>>').replace('[|User|] ', '<<BREAK>>').split('<<BREAK>>')
for i in a[1:]:
    line = i.strip()
    if line[0] != '[':
        print(f'[You]: {line}', end='\n\n')
    else:
        print(f'{line}', end='\n\n')

'asdf'

In [None]:
def print_conv(output):
    