## GoEx Fine Tuning

This finetuned file covers the following APIs

1. serpapi for GoogleSearch
2. news_search for news search from newsapi.org

In [None]:
#mounting functions
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install --upgrade datasets==2.12.0 huggingface-hub==0.14.1


In [None]:
!pip install transformers==4.28.1
!pip install huggingface-hub==0.14.1
!pip install torch==2.0.1
!pip install tqdm==4.65.0
!pip install prompt_toolkit==3.0.38
!pip install sentencepiece==0.1.99
!pip install accelerate==0.19.0
!pip install einops==0.7.0


In [None]:
!pip show datasets

In [None]:
!huggingface-cli login



    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
    
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) n
Token is valid.
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
!pip install bitsandbytes peft flask

In [None]:
import torch

torch.cuda.empty_cache()
!nvidia-smi


Fri Dec 20 04:44:20 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off | 00000000:00:04.0 Off |                    0 |
| N/A   42C    P0              50W / 400W |      2MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

## Generate Traning Data

In [None]:
!pip install serpapi google-search-results openai langchain streamlit

In [None]:
# Import required libraries
import os
import json
from serpapi import GoogleSearch
import requests
from datasets import Dataset
from sklearn.model_selection import train_test_split

# Load API keys
with open('newsapi_key.txt', 'r') as f:
    NEWSAPI_KEY = f.read().strip()
with open('serpapi_key.txt', 'r') as f:
    SERPAPI_API_KEY = f.read().strip()

# Set up environment variables for SerpAPI
os.environ["SERPAPI_API_KEY"] = SERPAPI_API_KEY

# Function to query SerpAPI
# Function to query SerpAPI
def query_serpapi(query):
    try:
        search = GoogleSearch({"q": query, "api_key": SERPAPI_API_KEY})
        results = search.get_dict().get("organic_results", [])

        # Safely extract 'title' and 'snippet', handle missing fields
        processed_results = []
        for result in results[:5]:  # Top 5 results
            title = result.get('title', 'No Title')
            snippet = result.get('snippet', 'No Description')
            processed_results.append(f"{title}: {snippet}")

        return processed_results
    except Exception as e:
        print(f"Error querying SerpAPI: {e}")
        return []


# Function to query NewsAPI
def query_newsapi(topic):
    try:
        url = f"https://newsapi.org/v2/everything?q={topic}&apiKey={NEWSAPI_KEY}&pageSize=5"
        response = requests.get(url)
        articles = response.json().get("articles", [])
        return [f"{article['title']}: {article['description']}" for article in articles]
    except Exception as e:
        print(f"Error querying NewsAPI: {e}")
        return []

# Expanded queries and topics for diversity
queries = [
    "Python programming", "latest technology", "healthcare innovations",
    "best laptops", "top programming languages", "machine learning trends",
    "deep learning frameworks", "data science in 2024", "tech startups"
]

topics = [
    "AI", "climate change", "finance", "sports", "politics",
    "entertainment", "cryptocurrency", "startup funding", "global economy",
    "environmental conservation", "healthcare policies", "education reforms"
]

def generate_data():
    data = []

    # Fetch SerpAPI data
    for query in queries:
        results = query_serpapi(query)
        if results:
            input_text = f"Find Google search results for '{query}'."
            output_text = "Here are the top Google search results:\n" + "\n".join(
                [f"{i + 1}. {result}" for i, result in enumerate(results)]
            )
            data.append({"input": input_text, "output": output_text})

    # Fetch NewsAPI data
    for topic in topics:
        articles = query_newsapi(topic)
        if articles:
            input_text = f"Find news articles about '{topic}'."
            output_text = "Here are some recent news articles:\n" + "\n".join(
                [f"{i + 1}. {article}" for i, article in enumerate(articles)]
            )
            data.append({"input": input_text, "output": output_text})

    return data


def save_datasets():
    print("Generating data...")
    data = generate_data()
    while len(data) < 500:
        data += generate_data()
        print(f"Generated {len(data)} samples so far...")

    print(f"Final dataset size: {len(data)}")
    train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)

    os.makedirs("./datasets", exist_ok=True)
    with open("./datasets/train.json", "w") as train_file:
        json.dump(train_data, train_file, indent=2)
    with open("./datasets/val.json", "w") as val_file:
        json.dump(val_data, val_file, indent=2)

    print("Datasets saved successfully:")
    print(" - Train dataset: ./datasets/train.json")
    print(" - Validation dataset: ./datasets/val.json")


save_datasets()


Generating data...
Generated 42 samples so far...
Generated 63 samples so far...
Generated 84 samples so far...
Generated 105 samples so far...
Generated 126 samples so far...
Generated 147 samples so far...
Generated 168 samples so far...
Generated 189 samples so far...
Generated 210 samples so far...
Generated 231 samples so far...
Generated 252 samples so far...
Generated 273 samples so far...
Generated 294 samples so far...
Generated 315 samples so far...
Generated 336 samples so far...
Generated 357 samples so far...
Generated 378 samples so far...
Generated 399 samples so far...
Generated 420 samples so far...
Generated 441 samples so far...
Generated 462 samples so far...
Generated 483 samples so far...
Generated 504 samples so far...
Final dataset size: 504
Datasets saved successfully:
 - Train dataset: ./datasets/train.json
 - Validation dataset: ./datasets/val.json


## Training Code

In [None]:
import os
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import Dataset
from flask import Flask, request, jsonify

# Constants
MODEL_NAME = "gorilla-llm/gorilla-openfunctions-v2"
OUTPUT_DIR = "/weights"
LOG_DIR = "/training"
TRAIN_DATASET_PATH = "/content/datasets/train.json"
VAL_DATASET_PATH = "/content/datasets/val.json"

# Clear GPU memory
torch.cuda.empty_cache()

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

# Load the model
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    load_in_8bit=True,
    torch_dtype=torch.float16,
)
model.resize_token_embeddings(len(tokenizer))

# Load dataset
def load_json_as_dataset(filepath):
    with open(filepath, "r") as f:
        data = json.load(f)
    return Dataset.from_list(data)

train_dataset = load_json_as_dataset(TRAIN_DATASET_PATH)
val_dataset = load_json_as_dataset(VAL_DATASET_PATH)

# Preprocessing
def preprocess_function(samples):
    # Format prompt with input and output
    combined_text = (
        f"<s><|system|>\nYour task is to respond to user queries.\n"
        f"<|user|>\n{samples['input']}\n<|assistant|>\n{samples['output']}</s>"
    )

    # Tokenize
    tokenized = tokenizer(
        combined_text,
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )

    input_ids = tokenized["input_ids"].squeeze()
    labels = input_ids.clone()  # Use the entire sequence as labels

    return {
        "input_ids": input_ids,
        "attention_mask": tokenized["attention_mask"].squeeze(),
        "labels": labels,
    }

# Apply preprocessing
tokenized_train_dataset = train_dataset.map(preprocess_function)
tokenized_val_dataset = val_dataset.map(preprocess_function)

# Training arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    evaluation_strategy="steps",
    eval_steps=100,
    logging_dir=LOG_DIR,
    logging_steps=10,
    per_device_train_batch_size=1,
    learning_rate=5e-5,
    num_train_epochs=1,
    save_strategy="epoch",
    save_total_limit=2,
    weight_decay=0.01,
    report_to="none",
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_val_dataset,
    tokenizer=tokenizer,
)

# Train the model
print("Starting training...")
trainer.train()

# Save the fine-tuned model
print("Saving the fine-tuned model...")
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print(f"Model saved successfully in {OUTPUT_DIR}!")

# Test function
def test_model(prompt):
    instruction = (
        f"<s><|system|>\nYour task is to respond to user queries.\n<|user|>\n{prompt}\n<|assistant|>\n"
    )
    inputs = tokenizer(instruction, return_tensors="pt", truncation=True, max_length=512).to("cuda")
    outputs = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_length=200,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test the model
test_prompt = "Find news articles about AI advancements."
print("Test Response:", test_model(test_prompt))

# Flask deployment
app = Flask(__name__)

@app.route('/query', methods=['POST'])
def query():
    data = request.json
    prompt = data.get("prompt", "")
    response = test_model(prompt)
    return jsonify({"response": response})

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000)




tokenizer_config.json:   0%|          | 0.00/4.24k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.61M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/462 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/669 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/22.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.97G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.85G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForCausalLM were not initialized from the model checkpoint at gorilla-llm/gorilla-openfunctions-v2 and are newly initialized: ['model.layers.20.self_attn.rotary_emb.inv_freq', 'model.layers.8.self_attn.rotary_emb.inv_freq', 'model.layers.23.self_attn.rotary_emb.inv_freq', 'model.layers.22.self_attn.rotary_emb.inv_freq', 'model.layers.21.self_attn.rotary_emb.inv_freq', 'model.layers.4.self_attn.rotary_emb.inv_freq', 'model.layers.16.self_attn.rotary_emb.inv_freq', 'model.layers.26.self_attn.rotary_emb.inv_freq', 'model.layers.25.self_attn.rotary_emb.inv_freq', 'model.layers.3.self_attn.rotary_emb.inv_freq', 'model.layers.5.self_attn.rotary_emb.inv_freq', 'model.layers.19.self_attn.rotary_emb.inv_freq', 'model.layers.12.self_attn.rotary_emb.inv_freq', 'model.layers.2.self_attn.rotary_emb.inv_freq', 'model.layers.24.self_attn.rotary_emb.inv_freq', 'model.layers.0.self_attn.rotary_emb.inv_freq', 'model.layers.11.self_attn.rotary_emb.inv_freq', 'model.layers.9.self_attn

generation_config.json:   0%|          | 0.00/121 [00:00<?, ?B/s]

Map:   0%|          | 0/403 [00:00<?, ? examples/s]

Map:   0%|          | 0/101 [00:00<?, ? examples/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Starting training...


Step,Training Loss,Validation Loss
100,0.0,
200,0.0,
300,0.0,
400,0.0,




Saving the fine-tuned model...




Model saved successfully in /content/drive/MyDrive/Project/training/weights!




Test Response: <s><|system|>
Your task is to respond to user queries.
<|user|>
Find news articles about AI advancements.
<|assistant|>
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Define the path to the fine-tuned model and tokenizer
OUTPUT_DIR = "/weights"

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(OUTPUT_DIR, use_fast=True)

# Load the model in 8-bit precision
model = AutoModelForCausalLM.from_pretrained(
    OUTPUT_DIR,
    device_map="auto",  # Automatically assigns the model to the correct devices
    load_in_8bit=True,  # Indicates that the model was trained in 8-bit precision
    torch_dtype=torch.float16,  # Specify the data type
)

# Define the inference function
def test_model(prompt):
    # Format the input prompt
    instruction = (
        f"<s><|system|>\nYour task is to respond to user queries.\n<|user|>\n{prompt}\n<|assistant|>\n"
    )

    # Tokenize the input prompt
    inputs = tokenizer(instruction, return_tensors="pt", truncation=True, max_length=512)

    # Move inputs to the correct device (determined automatically)
    inputs = {key: val.to(model.device) for key, val in inputs.items()}

    # Generate the response
    outputs = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_length=200,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
        num_return_sequences=1,
        temperature=0.7,
        top_k=50,
        top_p=0.95,
    )

    # Decode and return the generated response
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test the inference
test_prompt = "Find news articles about AI advancements."
response = test_model(test_prompt)
print("Test Response:", response)


You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` attribute will be overwritten with the one you passed to `from_pretrained`.
Some weights of the model checkpoint at /content/drive/MyDrive/Project/training/weights were not used when initializing LlamaForCausalLM: ['model.layers.22.self_attn.v_proj.weight_format', 'model.layers.13.mlp.up_proj.weight_format', 'model.layers.19.mlp.gate_proj.weight_format', 'model.layers.21.self_attn.v_proj.weight_format', 'model.layers.11.self_attn.v_proj.weight_format', 'model.layers.3.self_attn.k_proj.weight_format', 'model.layers.23.mlp.up_proj.weight_format', 'model.layers.13.self_attn.q_proj.weight_format', 'model.layers.0.self_attn.v_proj.weight_format', 'model.layers.12.mlp.gate_proj.weight_format', 'model.layers.22.self_attn.q_proj.weight_format', 'model.layers.20.mlp.up_proj.weight_format', 'model.layers.6.mlp.up_proj.weight_format', 'model.l

Test Response: <s><|system|>
Your task is to respond to user queries.
<|user|>
Find news articles about AI advancements.
<|assistant|>
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!


## Hosting grok app to connect with fine-tuned GoEx model

In [None]:
!pip install pyngrok



In [None]:
!pip install -U bitsandbytes



In [None]:
import os
with open('NGROK_AUTH.txt', 'r') as file:
    NGROK_AUTH_TOKEN = file.read().strip()
os.environ['NGROK_AUTH_TOKEN'] = NGROK_AUTH_TOKEN

In [None]:
# Install necessary dependencies
# for hosting flask pyngrok app

# Import libraries
from flask import Flask, request, jsonify
from pyngrok import ngrok
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
from google.colab import userdata


# Retrieve ngrok auth token from Colab secrets
#ngrok_auth_token = userdata.get("NGROK_AUTH_TOKEN")
ngrok_auth_token = os.getenv("NGROK_AUTH_TOKEN")

if not ngrok_auth_token:
    raise ValueError("NGROK_AUTH_TOKEN is not set in Colab Secrets.")

# Authenticate ngrok
ngrok.set_auth_token(ngrok_auth_token)

# Constants
MODEL_DIR = "/weights/checkpoint-1209"

# Load the fine-tuned model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
model = AutoModelForCausalLM.from_pretrained(MODEL_DIR, device_map="auto", torch_dtype=torch.float16)

def test_model(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    if "token_type_ids" in inputs:
        inputs.pop("token_type_ids")
    outputs = model.generate(**inputs, max_length=200)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Flask app
app = Flask(__name__)

@app.route('/query', methods=['POST'])
def query():
    data = request.json
    prompt = data.get("prompt", "")
    response = test_model(prompt)
    return jsonify({"response": response})

if __name__ == "__main__":
    # Start ngrok tunnel
    public_url = ngrok.connect(5000)
    print(f"Public URL: {public_url}")

    # Run the Flask app
    app.run(host="0.0.0.0", port=5000)


Some weights of the model checkpoint at /content/drive/MyDrive/W6998-DL/Project/training/weights/checkpoint-1209 were not used when initializing LlamaForCausalLM: ['model.layers.2.self_attn.o_proj.weight_format', 'model.layers.1.mlp.down_proj.weight_format', 'model.layers.7.self_attn.q_proj.weight_format', 'model.layers.23.mlp.down_proj.weight_format', 'model.layers.18.self_attn.o_proj.weight_format', 'model.layers.7.mlp.gate_proj.weight_format', 'model.layers.11.mlp.down_proj.weight_format', 'model.layers.18.self_attn.q_proj.weight_format', 'model.layers.25.mlp.up_proj.weight_format', 'model.layers.0.self_attn.o_proj.weight_format', 'model.layers.9.mlp.down_proj.weight_format', 'model.layers.28.self_attn.q_proj.weight_format', 'model.layers.25.self_attn.q_proj.weight_format', 'model.layers.1.self_attn.q_proj.weight_format', 'model.layers.0.self_attn.q_proj.weight_format', 'model.layers.10.mlp.up_proj.weight_format', 'model.layers.7.self_attn.o_proj.weight_format', 'model.layers.4.self

Public URL: NgrokTunnel: "https://8a27-34-138-80-49.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [19/Dec/2024 18:58:27] "[33mGET / HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [19/Dec/2024 18:58:28] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [19/Dec/2024 19:06:22] "POST /query HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [19/Dec/2024 19:06:48] "POST /query HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [19/Dec/2024 19:07:15] "POST /query HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [19/Dec/2024 19:07:41] "POST /query HTTP/1.1" 200 -


## Inference with Finetuned weight files

**Sample Inferencing with ngrok app**

In [None]:
import requests

# Define the ngrok public URL for your Flask API
url = "https://8a27-34-138-80-49.ngrok-free.app/query"

# Queries to test the model
queries = [
    "Find Google search results for 'Python programming'.",
    "What is the latest news about artificial intelligence?",
    "Find recipes for healthy dinner options.",
    "Tell me about the top tourist attractions in Paris."
]

# Function to query the model
def query_model(prompt):
    payload = {"prompt": prompt}
    response = requests.post(url, json=payload)
    if response.status_code == 200:
        return response.json().get("response", "No response")
    else:
        return f"Error: {response.status_code}, {response.text}"

# Loop through the queries and display results
for query in queries:
    print(f"Query: {query}")
    print(f"Response: {query_model(query)}")
    print("-" * 80)
