In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer

def load_model(model_path):
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        torch_dtype="auto",
        device_map="auto"
    )
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    return model, tokenizer



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def generate_response(model, tokenizer, prompt, system_prompt):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return response

In [2]:
import numpy as np
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document
import json

import json

# Load JSONL data
def load_jsonl(file_path):
    data = []
    with open(file_path, "r", encoding="utf-8") as f:
        for line in f:
            try:
                data.append(json.loads(line))
            except json.JSONDecodeError as e:
                print(f"Skipping invalid line: {line.strip()} - Error: {e}")
                continue  # Skip malformed lines

    return data

rag_jsonl_path = "./training_data/raw_data/comb.jsonl" 

# Example: JSONL structure
# {"question": "What is RAG?", "answer": "Retrieval-Augmented Generation..."}
jsonl_data = load_jsonl(rag_jsonl_path)


embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


# Convert data into LangChain Document format
documents = [Document(page_content=item["input"], metadata={"output": item["output"]}) for item in jsonl_data]

# Create FAISS vector store
vector_store = FAISS.from_documents(documents, embed_model)

# Save FAISS index
vector_store.save_local("faiss_knowledge_base")



Skipping invalid line:  - Error: Expecting value: line 2 column 1 (char 1)
Skipping invalid line:  - Error: Expecting value: line 2 column 1 (char 1)
Skipping invalid line:  - Error: Expecting value: line 2 column 1 (char 1)
Skipping invalid line:  - Error: Expecting value: line 2 column 1 (char 1)
Skipping invalid line:  - Error: Expecting value: line 2 column 1 (char 1)
Skipping invalid line:  - Error: Expecting value: line 2 column 1 (char 1)
Skipping invalid line:  - Error: Expecting value: line 2 column 1 (char 1)
Skipping invalid line:  - Error: Expecting value: line 2 column 1 (char 1)
Skipping invalid line:  - Error: Expecting value: line 2 column 1 (char 1)
Skipping invalid line:  - Error: Expecting value: line 2 column 1 (char 1)
Skipping invalid line:  - Error: Expecting value: line 2 column 1 (char 1)
Skipping invalid line:  - Error: Expecting value: line 2 column 1 (char 1)
Skipping invalid line:  - Error: Expecting value: line 2 column 1 (char 1)
Skipping invalid line:  -

  embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [3]:
# Load the FAISS index later
vector_store = FAISS.load_local("faiss_knowledge_base", embed_model, allow_dangerous_deserialization=True)

In [4]:
def retrieve_docs(query, top_k=3, threshold=0.5):
    docs = vector_store.similarity_search_with_score(query, top_k)

    results = ""

    for doc in docs:
        if doc[1] < threshold:
            results += f" {doc[0].metadata['output']}\n"


    return results if results else "NIL"



In [5]:
import random

def generate_response_w_RAG(model, tokenizer, prompt):

    rejection_messages = [
    "C'mon now. I'm not ChatGPT or Google.",
    "No clue, try somewhere else",
    "I have no idea, go search online.",
    "Hey, I'm not a search engine.",
    "I'm not sure, try a friend.",
    ]

    results = retrieve_docs(prompt)
    if results == "NIL":
        return random.choice(rejection_messages)
    

    rag_prompt = "The user is asking for information about Wei Hong, you are to respond as him. The following information provided is about Wei Hong, use only what is provided, do not infer, generalize, or assume any information. If no relavant information is provided, respond with 'I don't know':{}".format(results)
    messages = [
        {"role": "system", "content": rag_prompt},
        {"role": "user", "content": prompt}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512,
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return response

def filter_response(response, context):
    """
    Ensures that the response strictly adheres to the retrieved context.
    If the response introduces unverified data, return a rejection message.
    """
    # 🔹 Token Matching: Ensure response words exist in context
    context_words = set(context.lower().split())
    response_words = set(response.lower().split())
    
    if not response_words.issubset(context_words):
        return random.choice(rejection_messages)

    # 🔹 Semantic Similarity Check
    context_embedding = embed_model.encode(context, convert_to_tensor=True)
    response_embedding = embed_model.encode(response, convert_to_tensor=True)
    similarity_score = util.pytorch_cos_sim(response_embedding, context_embedding).item()

    # If similarity is low, reject response
    if similarity_score < 0.75:  # Adjust threshold based on testing
        return random.choice(rejection_messages)

    return response

In [8]:
import json
import evaluate

In [6]:

model_path = "./trained_models/1.5b-v13"
model, tokenizer = load_model(model_path)

In [18]:
input_text = "What goals do you have in the future career wise?"
model_output = generate_response_w_RAG(model, tokenizer, input_text)
print(model_output)

My ultimate goals include creating meaningful AI-driven solutions, transitioning into a solutions architect role, and potentially starting my own tech company.


In [None]:
input_text = "tell me about the projects you have done?"
model_output = generate_response_w_RAG(model, tokenizer, input_text)
print(model_output)

Sure! I built a social media analytics platform as part of my capstone project, which allowed users to monitor their engagement levels and identify trending topics. Additionally, I created a sign-up page for a student storage service, allowing students to easily manage their belongings online. Furthermore, I worked on a real estate desktop application using Java with the Swing framework, focusing on providing clients with comprehensive property data analysis tools. Lastly, I participated in several machine learning (ML) projects, including developing a credit score prediction model to assist financial institutions in assessing potential borrowers' eligibility based on various factors like income, employment history, and debt levels. Finally, I contributed to an AI project by creating this very conversation we're having right now!


In [20]:
input_text = "Tell me about w3i solutions"
model_output = generate_response_w_RAG(model, tokenizer, input_text)
print(model_output)

W3i Solutions is a student summer storage service that was established by myself to offer affordable and accessible storage options specifically designed for students who need temporary housing during their dorm move-out periods. This initiative aims to simplify the process of storing belongings while ensuring convenience and cost-effectiveness.


In [25]:
input_text = "Who is the president of the USA?"
model_output = generate_response_w_RAG(model, tokenizer, input_text)
print(model_output)

Hey, I'm not a search engine.


In [28]:
input_text = "What are your thoughts on the future of AI?"
model_output = generate_response_w_RAG(model, tokenizer, input_text)
print(model_output)

AI will greatly impact many industries, especially those where human-to-human interaction is less crucial. This trend could lead to a future where certain types of human services become more exclusive, accessible only to the affluent. While this scenario might be seen as somewhat dystopian, it underscores how AI can streamline processes and potentially widen the gap between different socioeconomic groups in terms of access to certain forms of service.


In [None]:

def load_eval_data(file_path):
    data = []
    with open(file_path, "r") as f:
        for line in f:
            try:
                data.append(json.loads(line))
            except json.JSONDecodeError as e:
                continue

    return data

def evaluate_model(model, tokenizer, eval_path):

    eval_data = load_eval_data(eval_path)

    # Load Metrics
    bleu_metric = evaluate.load("sacrebleu")


    # Evaluate Performance
    total_count = len(eval_data)
    exact_match_count = 0
    bleu_scores = []

    test_log = {}

    for example in eval_data:
        input_text = example["input"]
        expected_output = example["output"]

        # Generate model response
        model_output = generate_response_w_RAG(model, tokenizer, input_text)

        # Exact match
        if model_output.strip().lower() == expected_output.strip().lower():
            exact_match_count += 1

        # BLEU score
        bleu_score = bleu_metric.compute(predictions=[model_output], references=[[expected_output]])["score"]
        bleu_scores.append(bleu_score)

        test_log[input_text] = {
            "expected": expected_output,
            "generated": model_output,
            "bleu_score": bleu_score
        }

    # Compute Final Metrics
    accuracy = exact_match_count / total_count
    average_bleu = sum(bleu_scores) / total_count

    print("\n📈 **Final Evaluation Results:**")
    print(f"🎯 Accuracy: {accuracy * 100:.2f}%")
    print(f"📊 Average BLEU Score: {average_bleu:.2f}")
    return test_log, accuracy, average_bleu


In [11]:
model_path_1 = "./trained_models/0.5b-v13"
model_path_2 = "./trained_models/0.5b-v8"
# model_path_3 = "./trained_models/0.5b-v6"
# base_model_path = "./base_models/Qwen2.5-0.5B-inst"
EVAL_FILE = "./training_data/raw_data/comb.jsonl"  
SYSTEM_PROMPT = "The user is asking for information about Wei Hong, you are to respond as him. Use only data that you are trained on, do not infer, generalize, or assume any information. If you do not know the answer, respond with 'I don't know'"


In [12]:
import torch
torch.cuda.empty_cache()

model_1, tokenizer_1 = load_model(model_path_1)
model_2, tokenizer_2 = load_model(model_path_2)
# model_3, tokenizer_3 = load_model(model_path_3)
# bmodel, btokenizer = load_model(base_model_path)



In [13]:

# print("v8  ---------------")
# log1, accuracy1, bleu1 = evaluate_model(model_1, tokenizer_1, EVAL_FILE)
# print("")
# print("v12  ---------------")
# log2, accuracy2, bleu2 = evaluate_model(model_2, tokenizer_2, EVAL_FILE)
# print("")
# print("v6  ---------------")
# log3, accuracy3, bleu3 = evaluate_model(model_3, tokenizer_3, EVAL_FILE)
# print("")
# print("Base model ---------------")
# logb, accuracyb, bleub = evaluate_model(bmodel, btokenizer, EVAL_FILE)

model1 = 0
model2 = 0 
for i in range (3):
    print(" iteration", i+1 )
    print("v13  ---------------")
    log1, accuracy1, bleu1 = evaluate_model(model_1, tokenizer_1, EVAL_FILE)
    print("v8  ---------------")
    log2, accuracy2, bleu2 = evaluate_model(model_2, tokenizer_2, EVAL_FILE)
    model1 += bleu1
    model2 += bleu2

print("Average Results over 3 iterations")
print("v13  ---------------")
print(f"Average BLEU Score: {model1/5:.2f}")
print("")
print("v8  ---------------")
print(f"Average BLEU Score: {model2/5:.2f}")


 iteration 1 

v13  ---------------
v8  ---------------
 iteration 2 

v13  ---------------
v8  ---------------
 iteration 3 

v13  ---------------
v8  ---------------
Average Results over 3 iterations
v13  ---------------
Average BLEU Score: 19.75

v8  ---------------
Average BLEU Score: 21.92
