In [None]:
# Install necessary dependencies
!pip install transformers accelerate

In [None]:
from transformers import AutoTokenizer
import transformers
import torch

model = "vuongvd/llama-2-7b-chat-eb"
prompt = "What is a large language model?"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device_map="auto",
)

sequences = pipeline(
    f'### Instruction: {prompt}',
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=200,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

### Login HF

In [None]:
!pip install huggingface_hub
from huggingface_hub import interpreter_login
interpreter_login()

### Use CPU (and GPU if have)

In [None]:
import accelerate
from transformers import AutoModelForCausalLM, AutoTokenizer

def infer_lm():
    model_name = "meta-llama/Llama-2-7b-chat-hf"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")

    print("Enter a prompt to generate text or type 'exit' to quit.")
    while True:
        prompt = input("Prompt: ")

        if prompt.lower() == "exit":
            break

        input_tokens = tokenizer.encode(prompt, return_tensors="pt")
        output_tokens = model.generate(input_tokens, max_length=20, num_return_sequences=1)
        output_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)

        print(f"Generated text: {output_text}")

infer_lm()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]



Enter a prompt to generate text or type 'exit' to quit.
Prompt: Hi




Generated text: Hi, I am interested in your 2017 Honda Civic Si (
Prompt: Good




Generated text: Good news for the environment: New York City just banned single-use plastic bags
Prompt: exit


### Use GPU

In [None]:
!pip install bitsandbytes

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "meta-llama/Llama-2-7b-chat-hf"
prompt = "Tell me about gravity"
access_token = "hf_access_token"

model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_4bit=True,  use_auth_token=access_token)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, use_auth_token=access_token)
model_inputs = tokenizer(prompt, return_tensors="pt").to("cuda:0")

output = model.generate(**model_inputs)

print(tokenizer.decode(output[0], skip_special_tokens=True))



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Tell me about gravity
его Gravity (Gravity) is a 2013 American 3D space thriller film directed by James Gray and starring Sandra Bullock, George Clooney, and Ed Harris. The film follows two astronauts, Ryan Stone (Bullock) and Matt Kowalski (Clooney), who are on a mission to repair the Hubble Space Telescope. The film explores themes of survival, redemption, and the fragility of human existence in the face of the vast and unpredictable forces of gravity.

Gravity was a critical and commercial success, receiving widespread acclaim for its visual effects, suspenseful storytelling, and the performances of its leads. The film was also notable for its use of realistic physics and astronomy, which added to the overall sense of realism and immersion in the film.

The film's success can be attributed to a number of factors, including its timely release in the fall of 2013, which coincided with the 50th anniversary of the first manned mission to the moon. The film's marketing campaign also play

### Nothing but not found model safetensors

In [None]:
!pip install ctransformers

Installing collected packages: ctransformers
Successfully installed ctransformers-0.2.27


In [None]:
from ctransformers import AutoModelForCausalLM

model =  AutoModelForCausalLM.from_pretrained(
        # 'TheBloke/Llama-2-7B-Chat-GGML',
        'meta-llama/Llama-2-7b-chat-hf',
        model_type='mistral',
        temperature=0.1,
        top_p = 0.9)

prompt = "What is a large language model?"
out = model(f'### Instruction: {prompt}')
print(out)