In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Milvus
from langchain.document_loaders import WebBaseLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
import os
import time

In [None]:
connection_args={
        "uri": "https://in03-5052868020ac71b.api.gcp-us-west1.zillizcloud.com",
        "user": "vaclav@pechtor.ch",
        "token": "",
        "secure": True
    }

os.environ["OPENAI_API_KEY"] = ''

In [None]:
directory = './datasets/'
docs = []

In [None]:
for filename in os.listdir(directory):
    filepath = os.path.join(directory, filename)
    loader = TextLoader(filepath)
    doc = loader.load()
    docs.extend(doc)

In [None]:
text_splitter = CharacterTextSplitter(chunk_size=1024, chunk_overlap=0)
docs = text_splitter.split_documents(docs)

In [None]:
import traceback

embeddings = OpenAIEmbeddings()
batch_size = 10
max_retries = 1  # maximum number of retries

for i in range(0, len(docs), batch_size):
    doc_batch = docs[i:i+batch_size]
    print(f'Processing docs from {i} to {i+batch_size-1}')
    
    for attempt in range(max_retries):
        try:
            vector_store = Milvus.from_documents(
                doc_batch,  # process a batch
                embedding=embeddings,
                connection_args=connection_args
            )
            break  # break the retry loop if processing is successful
        except Exception as e:
            print(f'Exception occurred while processing batch from {i} to {i+batch_size-1}: {e}')
            print(traceback.format_exc())
            if attempt < max_retries - 1:  # no need to sleep on the last attempt
                time.sleep(1)  # wait for 1 second before retrying
            else:
                print(f'Switching to single-document processing for batch from {i} to {i+batch_size-1} after {max_retries} attempts.')
                for j, doc in enumerate(doc_batch):
                    for attempt_doc in range(max_retries):
                        try:
                            vector_store = Milvus.from_documents(
                                [doc],  # process a single document
                                embedding=embeddings,
                                connection_args=connection_args
                            )
                            break  # break the retry loop if processing is successful
                        except Exception as e:
                            print(f'Exception occurred while processing document {i+j}: {e}')
                            print(traceback.format_exc())
                            if attempt_doc < max_retries - 1:  # no need to sleep on the last attempt
                                time.sleep(1)  # wait for 1 second before retrying
                            else:
                                print(f'Skipping document {i+j} after {max_retries} attempts.')
                break


In [None]:
query = "Wann trifft sich der Kantonsrat nach den Wahlen?"
docs = vector_store.similarity_search(query)

print(docs)

In [None]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.llms import OpenAI

chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="map_reduce", return_intermediate_steps=True)
query = "Wann trifft sich der Kantonsrat nach den Wahlen?"
chain({"input_documents": docs, "question": query}, return_only_outputs=True)