In [5]:
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
import os
from openai import AzureOpenAI

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [17]:
# Use environment variables instead of hard-coded secrets
api_key = os.getenv("AZURE_OPENAI_KEY")
endpoint = os.getenv("ENDPOINT_URL", "https://aifdry-0610.openai.azure.com/")
if not api_key:
    print("Warning: AZURE_OPENAI_KEY not set. Set the environment variable before running or provide credentials securely.")

client = AzureOpenAI(
    azure_endpoint=endpoint,
    api_key=api_key,
    api_version="2025-01-01-preview",
)

In [7]:
loader = WebBaseLoader(url)

In [9]:
raw_documents = loader.load()

In [10]:
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(raw_documents)

In [44]:
# Ensure documents -> texts list exists
docs_for_emb = [d.page_content for d in documents]
print(docs_for_emb)
metadatas = [d.metadata for d in documents]  # 必要なら保存

["Upcoming Data Science Courses – 365 Data Science\n\n\n\n \n          Courses\n         \n          Learning Paths  Learning Paths\n             \n                Career Paths  Career Paths\n                   See all  Career Paths See all  \n                        Data Analyst\n                      \n                        Data Scientist\n                      \n                        Business Analyst\n                      \n                        Senior data analyst\n                      \n                        Senior data scientist\n                      \n                        Tableau developer\n                      \n                        Power BI developer\n                      \n                        Data engineer\n                      \n                        Machine learning scientist\n                      \n                        AI Engineer\n                       \n                    Not sure?\n                   Take a career quiz  \n                

In [73]:
import numpy as np
resp = client.embeddings.create(
    input=docs_for_emb,
    model="text-embedding-ada-002"
)
embeddings = np.array([item.embedding for item in resp.data], dtype="float32")
print("embeddings shape:", embeddings.shape)

embeddings shape: (3, 1536)


In [56]:
import faiss
dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)  # 単純な L2 インデックス
index.add(embeddings)
print("faiss index ntotal:", index.ntotal)

faiss index ntotal: 3


In [62]:
# 簡易検索関数（query を埋め込み化して検索、テキストとメタデータを返す）
def similarity_search(query, k=4):
    qresp = client.embeddings.create(input=[query], model="text-embedding-ada-002")
    q_emb = np.array([d.embedding for d in qresp.data], dtype="float32")
    D, I = index.search(q_emb, k)  # D: distances, I: indices
    results = []
    for idx in I[0]:
        if idx < len(docs_for_emb):
            results.append({"text": docs_for_emb[idx], "metadata": metadatas[idx]})
    return results

In [72]:
query = "このページはどんなコースについて説明していますか？"
hits = similarity_search(query, k=3)
context_text = "\n\n---\n\n".join([f"Doc[{i}]: {h['text']}" for i,h in enumerate(hits)])
print(context_text)
prompt = f"以下は参照用ドキュメントです：\n\n{context_text}\n\n質問: {query}\n\n提供された情報だけを使って簡潔に答えてください。"
print(prompt)

chat_resp = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ],
    temperature=0.0,
)
# 応答を取り出して表示
# print(chat_resp)
answer = chat_resp.choices[0].message.content
print(type(answer))
print(answer)

Doc[0]: Upcoming Data Science Courses – 365 Data Science



 
          Courses
         
          Learning Paths  Learning Paths
             
                Career Paths  Career Paths
                   See all  Career Paths See all  
                        Data Analyst
                      
                        Data Scientist
                      
                        Business Analyst
                      
                        Senior data analyst
                      
                        Senior data scientist
                      
                        Tableau developer
                      
                        Power BI developer
                      
                        Data engineer
                      
                        Machine learning scientist
                      
                        AI Engineer
                       
                    Not sure?
                   Take a career quiz  
                Bootcamp
               We 