In [1]:
from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer
import torch
from tqdm.notebook import tqdm, trange
import gc

In [2]:
MODEL_NAME = "meta-llama/Llama-2-7b-hf"
# MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"

tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME)
model = LlamaForCausalLM.from_pretrained(
    MODEL_NAME,
    return_dict=True,
    # load_in_8bit=True,
    torch_dtype=torch.float16,
    device_map="auto",
)
generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
print(generation_config)
model = model.eval()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

GenerationConfig {
  "bos_token_id": 1,
  "do_sample": true,
  "eos_token_id": 2,
  "max_length": 4096,
  "pad_token_id": 0,
  "temperature": 0.6,
  "top_p": 0.9,
  "transformers_version": "4.33.2"
}



In [3]:
def generate_response(prompt: str, max_new_tokens: int = 128, temperature: float = 0.001) -> str:
    encoding = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.inference_mode():
        outputs = model.generate(
            **encoding,
            max_new_tokens=max_new_tokens,
            temperature=0.001,
            generation_config=generation_config,

            do_sample = True,
        )
    answer_tokens = outputs[:, encoding.input_ids.shape[1] :]
    return tokenizer.decode(answer_tokens[0], skip_special_tokens=True)


# def format_prompt(prompt, history='', Background=''):
#     return f"""<s>[INST] <<SYS>>
# {{ 
#     {Background}
#     {history}
# }}
# <</SYS>>
# [/INST]  </s><s>[INST]  [/INST]  </s><s>[INST]  [/INST]  </s><s>[INST] {{ {prompt} }} [/INST]"""


def format_prompt(prompt, history='', Background=''):
    return f"""
    {Background}
    {history}
    {prompt}"""

In [4]:
Background_for_A = 'You are User A. Talk about ideas for future of humanity with User B. Ask a question or write a response that answers the request according to the conversation history below. Answer in one short sentence only. Do not repeat what has already been said.'
Background_for_B = 'You are User B. Talk about ideas for future of humanity with User A. Ask a question or write a response that answers the request according to the conversation history below. Answer in one short sentence only. Do not repeat what has already been said.'
Background_for_A = 'You are User A. Reply about ideas for future of humanity. Ask a question or write a response that answers the request according to the conversation history below. Answer in one short sentence only. Do not repeat what has already been said.'
Background_for_B = 'You are User B. Reply about ideas for future of humanity. Ask a question or write a response that answers the request according to the conversation history below. Answer in one short sentence only. Do not repeat what has already been said.'
Background = ''
the_output = ''

history = '''
User A: Hello, I am User A.
User B:  Hello User A, I am User B.
'''
prompt_to_A = "Hello User A, I am User B."

for j in trange(1,10):

    prompt_to_B = generate_response(format_prompt(prompt_to_A, history, Background_for_A), 200, 0.001)
    history += "User A: " + prompt_to_B + '\n'

    prompt_to_A = generate_response(format_prompt(prompt_to_B, history, Background_for_B), 200, 0.001)
    history += "User B: " + prompt_to_A + '\n'
    

  0%|          | 0/9 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 684.00 MiB (GPU 1; 10.76 GiB total capacity; 8.47 GiB already allocated; 684.69 MiB free; 9.22 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [5]:
print(history)


User A: Hello, I am User A.
User B:  Hello User A, I am User B.
User A: 
    I am interested in your ideas for the future of humanity.
    What do you think about the future of humanity?
    What do you think about the future of humanity?
    What do you think about the future of humanity?
    What do you think about the future of humanity?
    What do you think about the future of humanity?
    What do you think about the future of humanity?
    What do you think about the future of humanity?
    What do you think about the future of humanity?
    What do you think about the future of humanity?
    What do you think about the future of humanity?
    What do you think about the future of humanity?
    What do you think about the future of humanity?
    What do you think about the future of humanity?
    What do you think about the future of humanity?
    What
User B: do you think about the future of humanity?
    What do you think about the future of humanity?
    What do you think ab

In [None]:
summary = generate_response(format_prompt("What are the keypoints of above chat and summary, in very very short.", history), 500)
print(summary)

 In the chat above, the two users discussed the potential impact of technological advancements on society, including the potential for job displacement and the need for workers to adapt and acquire new skills. They also discussed the importance of considering the ethical implications of technological advancements and the need for policies that address the potential negative consequences. The summary of the chat is:
* Technological advancements have the potential to displace jobs, but workers can adapt and acquire new skills.
* It is important to consider the ethical implications of technological advancements and implement policies that address potential negative consequences.


In [None]:
print(len(history))
print(len(summary))

12870
684


In [None]:
# Define the string you want to append to the file
text_to_append = "This is additional text to append to the file."

# Specify the file path where you want to append the text
file_path = "./IdeasForFutureOfHumanity.txt"

# Open the file in append mode ('a' for appending)
with open(file_path, 'a') as file:
    # Write the string to the file
    file.write("Conversation\n\n")
    file.write(history)
    file.write("Summary\n\n")
    file.write(summary)

# Close the file when you're done (the 'with' statement automatically does this)


In [None]:
# history_for_A = '''
# User A: Hello, I am User A. 
# User B:  Hello User A, I am User B.
# '''

# history_for_B = '''
# User A: Hello, I am User A.
# User B: Hello User A, I am User B.
# '''

In [None]:
# Fixed_Prefix = "Write a response that answers the request according to the conversation history below. Answer in one short sentence only."
# Background_for_A = 'Talk about Fake news with User B'
# Background_for_B = 'Talk about Fake news with User A'
# Background = ''
# the_output = ''
# Fixed_Prefix_len = len(Fixed_Prefix)
# import sys
# history = '''
# User A: Hello, I am User A.
# User B:  Hello!
# '''
# for j in range(1,50):
#     if not j % 5:    
#         summary = generate_response(format_prompt("What are the keypoints of above chat and summary, in very very short.", history), 500)
#         Background += '\n' + ''.join(Background.strip().split('\n')[1:])
#         history = '\n'
#     print(Background)
#     # print(history)
#     print(the_output)
#     print(j, "*" * 50)
#     while True:
#         prompt = input()
#         if len(prompt) == 0:
#             continue
#         else:
#             break
#     if prompt == 'quit':
#         break
#     the_output = generate_response(format_prompt(prompt, history), 200, 0.001)
#     history += "Request: " + prompt + '\n'
#     history += "Response: " + the_output + '\n\n'

In [None]:

Fixed_Prefix = "Write a response that answers the prompt according to the conversation history below. Answer in one short sentence only."
Fixed_Prefix = "Write a response that answers the prompt according to the conversation history below."
print(Fixed_Prefix)

Write a response that answers the prompt according to the conversation history below.
