In [1]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS

In [2]:
from wikidocsloader import WikidocsLoader

In [3]:
book_id = 20
loader = WikidocsLoader(book_id)
documents = loader.load()

In [4]:
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=600, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

Created a chunk of size 987, which is longer than the specified 600
Created a chunk of size 616, which is longer than the specified 600
Created a chunk of size 1487, which is longer than the specified 600
Created a chunk of size 659, which is longer than the specified 600


In [5]:
from langchain.embeddings.openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [6]:
search_index = FAISS.from_documents(docs, embeddings)

In [7]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.prompts import PromptTemplate
from langchain import OpenAI

In [8]:
template = """Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES"). 
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
ALWAYS return a "SOURCES" part in your answer.
Respond in Korean.

QUESTION: {question}
=========
{summaries}
=========
FINAL ANSWER IN KOREAN:"""
PROMPT = PromptTemplate(template=template, input_variables=["summaries", "question"])

chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="stuff", prompt=PROMPT)

In [9]:
def ask(question):
    print(
        chain(
            {
                "input_documents": search_index.similarity_search(question, k=3),
                "question": question,
            },
            return_only_outputs=True,
        )["output_text"]
    )

In [10]:
ask('위키독스의 특징은 무엇인가요?')



위키독스의 특징은 간결함, 목차, 저자와의 교류, 고유 URL, 알림 기능, 공동 작업 기능, 백업, 변경 이력 등이 있습니다. 

SOURCES: 
https://wikidocs.net/153
https://wikidocs.net/156
https://wikidocs.net/151


In [11]:
ask('위키독스 API가 있나요?')


위키독스는 Open API를 제공하고 있습니다. 현재는 시험 기간으로 조회서비스만 운영하고 있습니다. 다음 URL에서 API를 확인하고 테스트해 볼 수 있습니다. 
SOURCES: 
https://wikidocs.net/178030
https://wikidocs.net/153
https://wikidocs.net/156


In [12]:
ask('위키독스에 광고를 게시할 수 있나요?')


위키독스에는 광고를 게시할 수 있습니다. 2017년 5월부터 저자가 등록한 광고와 위키독스의 광고가 9대 1의 비율로 번갈아가면서 표시됩니다. 또한 저자가 활성화된 애드센스 계정이 없는 경우 위키독스 포인트 광고
