In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/health-products/InfoHealthFood_20241225.xlsx


In [2]:
!pip install gradio
!pip install langchain
!pip install -U langchain-community
!pip install qdrant-client
!pip install tiktoken
!pip install peft transformers accelerate
!pip install openai

Collecting gradio
  Downloading gradio-5.9.1-py3-none-any.whl.metadata (16 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.5.2 (from gradio)
  Downloading gradio_client-1.5.2-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.25.1 (from gradio)
  Downloading huggingface_hub-0.27.0-py3-none-any.whl.metadata (13 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.8/41.8 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 k

In [3]:
import os
from dotenv import load_dotenv
import pandas as pd
from langchain.vectorstores import Qdrant
from langchain.embeddings.openai import OpenAIEmbeddings
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load environment variables from .env file
load_dotenv('/kaggle/input/openai-env/.env')

# Set Hugging Face model
HUGGINGFACE_MODEL = "gpt2"
MODEL_NAME = "text-embedding-3-small"
COLLECTION_NAME = "health_products"

# Load data from Excel
file_path = "/kaggle/input/health-products/InfoHealthFood_20241225.xlsx"  # Update with the correct path
try:
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    df = pd.read_excel(file_path)
    if df.empty:
        raise ValueError("The Excel file is empty or invalid.")
    df = df[["許可證字號", "中文品名", "保健功效"]]
    df["QA"] = df["中文品名"] + " 的保健功效是: " + df["保健功效"]
    print("Data loaded successfully.")
except Exception as e:
    print(f"Error loading data: {e}")
    exit()

# Function to initialize embeddings and vectorstore
def initialize_qdrant(documents):
    try:
        api_key = os.getenv("OPENAI_API_KEY")
        if not api_key:
            raise ValueError("OPENAI_API_KEY environment variable not set.")

        embeddings = OpenAIEmbeddings(openai_api_key=api_key, model=MODEL_NAME)
        print("Embeddings initialized successfully.")
        qdrant = Qdrant.from_texts(
            texts=[doc["QA"] for doc in documents],
            embedding=embeddings,
            location=":memory:",  # Use in-memory storage
            collection_name=COLLECTION_NAME
        )
        print("Qdrant initialized successfully.")
        return qdrant
    except Exception as e:
        print(f"Error initializing Qdrant: {e}")
        return None

# Custom prompt generator
def custom_prompt(query: str, qdrant):
    if qdrant is None:
        return "Error: Qdrant is not initialized."
    try:
        results = qdrant.similarity_search(query, k=3)
        if not results:
            return "No relevant information found."
        context = "\n".join([x.page_content for x in results])
        return context
    except Exception as e:
        return f"Error generating prompt: {e}"

# Chatbot response function using Hugging Face GPT-2
def chatbot_response(query, qdrant, model, tokenizer):
    response = custom_prompt(query, qdrant)
    return response

# Initialize Qdrant
try:
    documents = [{"QA": row["QA"]} for _, row in df.iterrows()]
    if not documents:
        raise ValueError("No documents to initialize Qdrant.")
    qdrant = initialize_qdrant(documents)
    if qdrant is None:
        raise ValueError("Failed to initialize Qdrant.")
except Exception as e:
    print(f"Error initializing Qdrant: {e}")
    qdrant = None

if __name__ == "__main__":
    if qdrant is None:
        print("Exiting program due to Qdrant initialization failure.")
        exit()

    # Load Hugging Face GPT-2 model and tokenizer
    try:
        model = AutoModelForCausalLM.from_pretrained(HUGGINGFACE_MODEL)
        tokenizer = AutoTokenizer.from_pretrained(HUGGINGFACE_MODEL)
        print("Model and tokenizer loaded successfully.")
    except Exception as e:
        print(f"Error loading model or tokenizer: {e}")
        exit()

    while True:
        query = input("Enter a product name to check its health benefits (or type 'exit' to quit): ")
        if query.lower() == "exit":
            print("Exiting. Goodbye!")
            break
        response = chatbot_response(query, qdrant, model, tokenizer)
        print("Response:", response)


  warn("Workbook contains no default style, apply openpyxl's default")


Data loaded successfully.
Embeddings initialized successfully.
Qdrant initialized successfully.




Model and tokenizer loaded successfully.


Enter a product name to check its health benefits (or type 'exit' to quit):  肝不好要吃啥有幫助


Response: NK有益甘膠囊 的保健功效是: 護肝功能
黃金組合甘甘好膠囊 的保健功效是: 護肝
舒甘調達食品膠囊 的保健功效是: 護肝功能


Enter a product name to check its health benefits (or type 'exit' to quit):  防止心血管疾病要吃什麼呢


Response: 紅薏仁飲 的保健功效是: 調節血脂
健康3D錠狀食品 的保健功效是: 調節血脂, 調節血糖
高鈣鮮豆漿 的保健功效是: 調節血脂, 骨質保健


Enter a product name to check its health benefits (or type 'exit' to quit):  exit


Exiting. Goodbye!
