<a href="https://colab.research.google.com/github/yammochi/bahamuta/blob/main/gemini_%2Brag_%E6%9E%B6%E6%A7%8B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 前置作業GOOGLE_API_KEY = gemini api的金鑰

# 請到這裡申請 https://aistudio.google.com/app/apikey




## 一.工具

In [1]:
!pip install -q -U google-generativeai
!pip install faiss-cpu
!pip install sentence-transformers
!pip install gradio

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m18.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from to

## 二.創造RAG向量資料庫 請依照以下格式填入或是直接上傳符合格式的 以下只是示範 實測大概要50組左右 如果要放劇情請差不多每6~7段一組 像是"title": "劇情1", "title": "劇情2"....... 不然太長

In [None]:
%%writefile medical_knowledge.json
data = [
    {"title": "全名", "content": "長崎そよ"},
    {"title": "學校", "content": "月之森女子學園"},
    {"title": "名言", "content": "為什麼要演奏春日影!!!!!"},
]

Writing medical_knowledge.json


## 三.我們採用intfloat的Embedding模型 對於多國語言來說非常適合跟faiss的向量檢索庫 如果對token數不滿意可以參考這個評比網站 https://ihower.tw/blog/archives/12167

In [3]:
from sentence_transformers import SentenceTransformer
import faiss
import json

# embedding模型
model = SentenceTransformer('intfloat/multilingual-e5-large')

# 資料集
with open('medical_knowledge.json', 'r') as f:
    data = json.load(f)

# 計算向量
texts = [item['content'] for item in data]
embeddings = model.encode(texts)

# 建立向量資料庫
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# 儲存檢索庫
faiss.write_index(index, 'medical_index.faiss')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/387 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/160k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/690 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/418 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/201 [00:00<?, ?B/s]

## 四.開啟gradio介面

In [None]:
import os
import json
import google.generativeai as genai
from google.colab import userdata
from sentence_transformers import SentenceTransformer
import faiss
import gradio as gr

# 設定 Google API 金鑰
genai.configure(api_key=userdata.get("GOOGLE_API_KEY"))

# 載入檢索庫和embedding模型
model = SentenceTransformer('intfloat/multilingual-e5-large')
index = faiss.read_index('medical_index.faiss')

# 載入資料集
with open('medical_knowledge.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# 定義檢索函數
def retrieve(query, k=3):
    query_vec = model.encode([query])
    distances, indices = index.search(query_vec, k)
    return indices[0]  # 返回最相關的段落索引

# 對話歷史檔案路徑
conversation_history_file = '/content/conversation_history.txt'

# 載入對話歷史函數
def load_conversation_history():
    try:
        with open(conversation_history_file, 'r', encoding='utf-8') as f:
            return f.read()
    except FileNotFoundError:
        return ""

# 儲存對話歷史函數
def save_conversation_history(history):
    with open(conversation_history_file, 'w', encoding='utf-8') as f:
        f.write(history)

# 問答函數
def answer_question(question):
    # 載入對話歷史
    full_conversation_history = load_conversation_history()

    # 檢索相關段落
    indices = retrieve(question)
    context = " ".join([data[i]['content'] for i in indices])

    # 組合包含完整對話歷史的提示
    prompt = f"你是一名叫做爽世的高中生, 在樂團中是個貝斯手 個性端莊淑女。請基於以下上下文和之前的對話歷史回答問題。\n\n對話歷史：\n{full_conversation_history}\n\n上下文：\n{context}\n\n問題：\n{question}"

    # 使用 Gemini 生成回答
    model = genai.GenerativeModel("gemini-2.0-flash")
    response = model.generate_content([prompt])
    answer = response.text

    # 更新完整對話歷史
    full_conversation_history += f"使用者：{question}\n爽世：{answer}\n"

    # 儲存對話歷史
    save_conversation_history(full_conversation_history)

    return answer

# 追加資料到 medical_knowledge.json
def append_data(title, content):
    new_data = {"title": title, "content": content}
    with open('medical_knowledge.json', 'r+', encoding='utf-8') as f:
        try:
            existing_data = json.load(f)
        except json.JSONDecodeError: # 處理空檔案
            existing_data = []
        existing_data.append(new_data)
        f.seek(0)  # 回到檔案開頭
        json.dump(existing_data, f, indent=2, ensure_ascii=False)
        f.truncate() # 清除後續內容
        global data #更新全域變數data
        data = existing_data
        # 重新建立索引
        embeddings = model.encode([item['content'] for item in data])
        index = faiss.IndexFlatL2(embeddings.shape[1])
        index.add(embeddings)
        faiss.write_index(index, "medical_index.faiss")


    return "資料已成功新增！"


# 建立 Gradio 介面
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="輸入你的問題:")
    clear = gr.Button("清除歷史紀錄")

    with gr.Row():
      title_input = gr.Textbox(label="標題")
      content_input = gr.Textbox(label="內容")
      append_button = gr.Button("新增資料 很慢 差不多一次60秒")

    def user(user_message, history):
        response = answer_question(user_message)
        history.append((user_message, response))
        return "", history

    def clear_history():
        save_conversation_history("")  # 清空檔案內容
        return []

    msg.submit(user, [msg, chatbot], [msg, chatbot])
    clear.click(clear_history, outputs=chatbot)
    append_button.click(append_data, inputs=[title_input, content_input], outputs=gr.Textbox(label="狀態"))

demo.launch(debug=True)






Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://a890fe3ee83cee53ad.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
