In [10]:
import torch
import gc
from transformers import AutoTokenizer, AutoModelForCausalLM


# unassign the model and free up GPU RAM
def freeup_VRAM():
    memory_used_before = torch.cuda.memory_reserved(0) / 1024 ** 3
    try:
        global model, tokenizer
        del model, tokenizer
    except:
        pass
    gc.collect()
    torch.cuda.empty_cache()
    memory_used_after = torch.cuda.memory_reserved(0) / 1024 ** 3
    print(f"Freed up {memory_used_before - memory_used_after:.1f} GB of VRAM.")


def generate_text(
    prompt, do_sample=True, max_new_tokens=50, temperature=0.9, top_k=50, top_p=0.95
):
    encoded_input = tokenizer(prompt, return_tensors="pt")
    output_sequences = model.generate(
        input_ids=encoded_input["input_ids"].cuda(),
        max_new_tokens=max_new_tokens,
        do_sample=do_sample,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
    )
    generated_text = tokenizer.decode(output_sequences[0], skip_special_tokens=True)
    return generated_text


model_names = [
    "EleutherAI/gpt-neo-125M",
    "EleutherAI/gpt-neo-1.3B",
    "EleutherAI/gpt-neo-2.7B",
    "EleutherAI/gpt-j-6B",
]


In [12]:
model_idx = 3
load_in_8bit = True
half_precision = False

freeup_VRAM()

tokenizer = AutoTokenizer.from_pretrained(model_names[model_idx])
if load_in_8bit:
    model = AutoModelForCausalLM.from_pretrained(
        model_names[model_idx], device_map="auto", load_in_8bit=True
    )
else:
    model = AutoModelForCausalLM.from_pretrained(
        model_names[model_idx],
        device_map="auto",
        torch_dtype=torch.float16 if half_precision else "auto",
    )
print(
    f"Loaded model {model_names[model_idx]} in {'int8' if load_in_8bit else 'fp16' if half_precision else 'fp32'}"
)

model.eval()

# set pad_token_id to eos_token_id because GPT2 does not have a EOS token
model.config.pad_token_id = model.config.eos_token_id
model.generation_config.pad_token_id = model.config.eos_token_id


Freed up 2.9 GB of VRAM.
Loaded model EleutherAI/gpt-j-6B in int8


In [16]:
prompt = """Write a python code that plots the function y=sin(x)"""
# prompt = '''飞船登机口处，一群人在那儿拼命地挥手，船上的人也在欢呼，只是每个人的目光都是那么的恋恋不舍。这是第五批运向诺顿星开拓者，仙女星系的三个星球是人类移民的天堂，那里经过三百年的建设已经形成了完备高级的生活设施，而且各方面的福利，教育，以及军事防御都是太阳系标准，甚至更高，但诺顿星……还太原始了些，尤其是还有无孔不入的扎戈族，它们就像地球上的某种古董级生物蟑螂一样的顽强，而且随着人类的进化而进化。这次是诺顿星进行第二期开发计划，愿意去这种地方的多是生活不怎么样的，希望用诺顿星的两年来改善生活，年纪都是在三四十岁甚至更老。可是在人群中却有一个看来相当相当年轻的身影，看起来顶多十四五岁，男孩望着送别区，一个红鼻子老头拼命地挥舞着自己的帽子，老人的眼圈都红了，显然是哭过，是什么理由把自己这么年轻的孩子送上这样的飞船？'''
print(generate_text(prompt))


Write a python code that plots the function y=sin(x) using gnuplot, where x ranges from 0 to 2*pi

I have been trying to plot this function with gnuplot, but I have no idea how to plot x between 0 and 2*pi. Can anyone tell me how


In [9]:
prompt = """Please answer the following question:

Question: What is the capital of Canada?
Answer: Ottawa

Question: What is the currency of Switzerland?
Answer: Swiss franc

Question: Who is the first president of United States?
Answer:"""
print(generate_text(prompt, max_new_tokens=5, temperature=1, top_p=1))


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Please answer the following question:

Question: What is the capital of Canada?
Answer: Ottawa

Question: What is the currency of Switzerland?
Answer: Swiss franc

Question: Who is the first president of United States?
Answer: George Washington

Question


In [49]:
encoded_input = tokenizer("SAT", return_tensors="pt")
print(encoded_input)


{'input_ids': tensor([[  50, 1404]]), 'attention_mask': tensor([[1, 1]])}


In [13]:
print(torch.cuda.memory_summary())


|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |    6281 MB |    6281 MB |   42977 MB |   36696 MB |
|       from large pool |    6276 MB |    6276 MB |   42954 MB |   36678 MB |
|       from small pool |       5 MB |       5 MB |      23 MB |      18 MB |
|---------------------------------------------------------------------------|
| Active memory         |    6281 MB |    6281 MB |   42977 MB |   36696 MB |
|       from large pool |    6276 MB |    6276 MB |   42954 MB |   36678 MB |
|       from small pool |       5 MB |       5 MB |      23 MB |      18 MB |
|---------------------------------------------------------------

In [5]:
freeup_VRAM()


Cleared model VRAM.
