<a href="https://colab.research.google.com/github/vijaysrajan/buysellconnect/blob/main/BasicGemma2bLocally.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Step 1: Install required packages with compatible versions
!pip install --upgrade transformers>=4.38.0
!pip install --upgrade torch torchvision torchaudio
!pip install --upgrade accelerate
!pip install langchain>=0.1.0
!pip install langchain-huggingface
!pip install huggingface_hub>=0.20.0


In [None]:
!pip install langchain-community

In [None]:

# Step 2: Restart runtime after installation (important!)
# Go to Runtime > Restart runtime in Colab, then run the cells below


In [None]:

# Step 3: Authenticate with Hugging Face (Gemma is a gated model)
from huggingface_hub import notebook_login
notebook_login()  # This will prompt you to enter your HF token


In [None]:

# Step 4: Import required libraries
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_huggingface import HuggingFacePipeline
from langchain.schema import HumanMessage
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

In [None]:

# Step 5: Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")


In [None]:

# Step 6: Load the model and tokenizer locally
model_id = "google/gemma-2b-it"

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_id)

print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    device_map="auto" if device == "cuda" else None,
    low_cpu_mem_usage=True
)


In [None]:

# # Detect if CUDA is available and set device
# device = "cuda" if torch.cuda.is_available() else "cpu"

# # Step 7: Create a pipeline
# text_generation_pipeline = pipeline(
#     "text-generation",
#     model=model,
#     tokenizer=tokenizer,
#     max_new_tokens=1000,
#     do_sample=False,  # Greedy decoding
#     return_full_text=False,
#     device=1 if device == "cuda" else -1
# )




# Detect if CUDA is available and set device
device = "cuda" if torch.cuda.is_available() else "cpu"
device="auto"
# Step 7: Create a pipeline
text_generation_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1000,
    do_sample=False,  # Greedy decoding
    return_full_text=False,
    temperature=0.001,
    #device=0 if device == "cuda" else -1  # Use GPU 0 if available, else CPU
)

In [None]:
# Step 8: Create LangChain wrapper
llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

# Step 9: Test the model
print("Testing the model...")
#prompt = "Explain what is Generative AI in 3 bullet points"
prompt = """ \
### Input: \
I want to buy my 2 year old fridge. It is 350 litres and red in colour. I am moving out of my town. \
### Schema:\
{
  "item": "string",
  "quantity": "string",
  "colour": "string"
  "capacity": "string"\
} \
### Response: \
"""


response = llm.invoke(prompt)
print("Response:")
print(response)

# Alternative: Direct pipeline usage (without LangChain)
print("\n" + "="*50)
print("Alternative: Direct pipeline usage")
result = text_generation_pipeline(prompt)
print(result[0]['generated_text'])

In [None]:
Q1 = "I want to sell my 2 year old fridge. It is 350 litres and red in colour. I am moving out of my town."


Q = Q1

In [None]:
prompt_template1 = ChatPromptTemplate.from_template(f"""
<bos><start_of_turn>user
Answer the following question based on the provided context. If you cannot answer the question based on the context, say "I don't have enough information to answer that question."

Context: Is the question from a buyer or seller or a lessor or renter or is the question from someone who wants to connect. Only 5 choices possible buyer, seller, connector, lessor, renter. Please print only one of the above choices and print that. Do not print anything else.

Question: {Q}
<end_of_turn>
<start_of_turn>model
""")
print (prompt_template1.format_messages(Q=Q))

In [None]:
prompt_template2 = ChatPromptTemplate.from_template(f"""
<bos><start_of_turn>user
Classify the following statement into exactly ONE category:
- buyer
- seller
- connector
- lessor
- renter

Statement: {Q}

Respond with ONLY the category name. No explanation, no additional text, just the single word.
<end_of_turn>
<start_of_turn>model
""")
print (prompt_template2.format_messages(Q=Q))

In [None]:
prompt_template3 = ChatPromptTemplate.from_template(f"""
<bos><start_of_turn>user
Answer the following question based on the provided context in one word or phrase. If you cannot answer the question based on the context, say "I don't have enough information to answer that question." else do not print any context in the response. I am looking for a single word or phrase that best answers the question.

Context: Please extract the attributes and features like colour, age, make and model, capacity etc if available, in the question and put it in a json format. Please start with the item under consideration followed by the attributes.

Question: {Q}
<end_of_turn>
<start_of_turn>model
""")

print (prompt_template3.format_messages(Q=Q))

In [None]:
prompt_template4 = ChatPromptTemplate.from_template(f"""
<bos><start_of_turn>user
For the question:{Q}, please fill the following json appropriately. Please only output the json. Please fill json values only where applicable or fill null.

{{{{
  "question_raiser_type": "string", #buyer or seller or connector or lessor or renter
  "item": "string", #no adjectives. no descriptions. Just the item no description single word or phrase only. Only item name. If applicable else null.
  "item_category": "string",
  "quantity": "string",
  "colour": "string",
  "capacity": "string",
  "age": "string",
  "truck_type": "string",
}}}}
<end_of_turn>
<start_of_turn>model""")

# Removed the incorrect print statement
print (prompt_template4.format_messages(Q=Q))

In [None]:
def post_processing(response):
  # Extract just the classification word
  return_word = ""
  classification_words = ["buyer", "seller", "connector", "lessor", "renter", "service_provider", "service_seeker"]
  for word in classification_words:
      if word.lower() in response.lower():
          return_word = word
          #print(word)
          break
  else:
      print(response.strip())  # Fallback to original if no match found
  return return_word

In [None]:
qa_rag_chain = (
    # Removed commented out code and fixed syntax
    {"question": RunnablePassthrough()}
    | prompt_template4
    | llm
    | StrOutputParser()
)

# Assuming 'question' variable is defined elsewhere or should be Q
# Using Q based on previous cell execution
response = qa_rag_chain.invoke({"question": Q})
print(response)
print(post_processing(response))


**Note:** You need to restart the runtime after installing the library for the changes to take effect. Go to `Runtime > Restart runtime` in the Colab menu.