# Notebook Showing How to Use Unstructured-Ingest to Pre-Process Local Documents

### Constants + Parameters

In [117]:
output_dir = 'my-docs'
input_dir = 'files_used'
weaviate_url = "http://localhost:8080"
embedding_model_name = 'all-MiniLM-L6-v2'

## Ingestion Pipeline for VectorDB (Weaviate)

### Helper Functions to Ingest Documents and Pre-Process Them

In [118]:
import subprocess
import os
from typing import List, Dict
from userpaths import get_my_documents


# def process_local(output_dir: str, num_processes: int, input_path: str = get_my_documents()):
def process_local(output_dir: str, num_processes: int, input_path: str):
        command = [
          "unstructured-ingest",
          "local",
          "--input-path", input_path,
          "--output-dir", output_dir,
          "--num-processes", str(num_processes),
          "--recursive",
          "--verbose",
        ]

        # Run the command
        process = subprocess.Popen(command, stdout=subprocess.PIPE)
        output, error = process.communicate()

        # Print output
        if process.returncode == 0:
            print('Command executed successfully. Output:')
            print(output.decode())
        else:
            print('Command failed. Error:')
            print(error.decode())

def get_result_files(folder_path) -> List[Dict]:
    file_list = []
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith('.json'):
                file_path = os.path.join(root, file)
                file_list.append(file_path)
    return file_list

### Run Desired Pipeline To Generate Clean JSON using Unstructured

In [119]:
process_local(output_dir=output_dir, num_processes=2, input_path=input_dir)
files = get_result_files(output_dir)

Command executed successfully. Output:



### Helper Functions to Setup Weaviate Schema and Client

In [120]:
import uuid
import weaviate
from weaviate.util import get_valid_uuid

def create_local_weaviate_client(db_url: str):
    return weaviate.Client(
        url=db_url,
    )

def get_schema(vectorizer: str = "none"):
    return {
        "classes": [
            {
                "class": "Doc",
                "description": "A generic document class",
                "vectorizer": vectorizer,
                "properties": [
                    {
                        "name": "last_modified",
                        "dataType": ["text"],
                        "description": "Last modified date for the document",
                    },
                    {
                        "name": "player",
                        "dataType": ["text"],
                        "description": "Player related to the document",
                    },
                    {
                        "name": "position",
                        "dataType": ["text"],
                        "description": "Player Position related to the document",
                    },
                    {
                        "name": "text",
                        "dataType": ["text"],
                        "description": "Text content for the document",
                    },
                ],
            },
        ],
    }

def upload_schema(my_schema, weaviate):
    weaviate.schema.delete_all()
    weaviate.schema.create(my_schema)

def count_documents(client: weaviate.Client) -> Dict:
    response = (
        client.query
        .aggregate("Doc")
        .with_meta_count()
        .do()
    )
    count = response
    return count

### Setup Weaviate Client and Schema

In [121]:
client = create_local_weaviate_client(db_url=weaviate_url)
my_schema = get_schema()
upload_schema(my_schema, weaviate=client)

### Helper Functions to Stage Unstructured Documents for Indexing

In [122]:
from unstructured.chunking.title import chunk_by_title
from unstructured.documents.elements import DataSourceMetadata
from unstructured.partition.json import partition_json
from sentence_transformers import SentenceTransformer

embedding_model = SentenceTransformer(embedding_model_name)

def compute_embedding(chunk_text: List[str]):
    embeddings = embedding_model.encode(chunk_text)
    return embeddings
    

def get_chunks(elements, chunk_under_n_chars=500, chunk_new_after_n_chars=1500):
    for element in elements:
        if not type(element.metadata.data_source) is DataSourceMetadata:
            delattr(element.metadata, "data_source")

        if hasattr(element.metadata, "coordinates"):
            delattr(element.metadata, "coordinates")

    chunks = chunk_by_title(
        elements,
        combine_under_n_chars=chunk_under_n_chars,
        new_after_n_chars=chunk_new_after_n_chars
    )

    for i in range(len(chunks)):
        chunks[i] = {"last_modified": chunks[i].metadata.last_modified, "text": chunks[i].text}

    chunk_texts = [x['text'] for x in chunks]
    embeddings = compute_embedding(chunk_texts)
    return chunks, embeddings


def add_data_to_weaviate(files, client, chunk_under_n_chars=500, chunk_new_after_n_chars=1500):
    for filename in files:
        try:
            elements = partition_json(filename=filename)
            chunks, embeddings = get_chunks(elements, chunk_under_n_chars, chunk_new_after_n_chars)
        except IndexError as e:
            print(e)
            continue

        print(f"Uploading {len(chunks)} chunks for {str(filename)}.")
        for i, chunk in enumerate(chunks):
            client.batch.add_data_object(
                data_object=chunk,
                class_name="doc",
                uuid=get_valid_uuid(uuid.uuid4()),
                vector=embeddings[i]
            )
        
    client.batch.flush()

### Add Chunks to Weaviate

In [123]:
add_data_to_weaviate(
    files=files,
    client=client,
    chunk_under_n_chars=500,
    chunk_new_after_n_chars=1000
)

print(count_documents(client=client)['data']['Aggregate']['Doc'])

Uploading 6 chunks for my-docs\aidant.txt.json.
Uploading 1 chunks for my-docs\anniv.txt.json.
Uploading 1 chunks for my-docs\arret-travail.txt.json.
Uploading 2 chunks for my-docs\artt.txt.json.
Uploading 1 chunks for my-docs\cet.txt.json.
Uploading 3 chunks for my-docs\congés-payés.txt.json.
Uploading 2 chunks for my-docs\coopt.txt.json.
Uploading 1 chunks for my-docs\dev-competences.txt.json.
Uploading 2 chunks for my-docs\eae.txt.json.
Uploading 2 chunks for my-docs\egalite-pro.txt.json.
Uploading 6 chunks for my-docs\enfants.txt.json.
Uploading 14 chunks for my-docs\epargnes salariale.txt.json.
Uploading 2 chunks for my-docs\equipe-formation.txt.json.
Uploading 2 chunks for my-docs\eTemptation.txt.json.
Uploading 1 chunks for my-docs\frac.txt.json.
Uploading 1 chunks for my-docs\gan.txt.json.
Uploading 4 chunks for my-docs\handicap.txt.json.
Uploading 1 chunks for my-docs\mariage-pacs.txt.json.
Uploading 2 chunks for my-docs\mobilite-durable.txt.json.
Uploading 1 chunks for my-doc

## LangChain RAG Application

In [8]:
from langchain.llms import LlamaCpp
from langchain.vectorstores.weaviate import Weaviate
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.prompts import PromptTemplate
from deep_translator import GoogleTranslator

### Instantiate Local Llama 2 LLM

In [40]:
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
n_gpu_layers = 4  # Metal set to 1 is enough.
n_batch = 1000  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="model_files/llama-2-7b-chat.Q3_K_S.gguf",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=2048,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=True, # Verbose is required to pass to the callback manager
)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


### Helper Function to Run RAG Process

In [129]:
def question_answer(question: str, vectorstore: Weaviate):
    embedding = compute_embedding(question)
    similar_docs = vectorstore.max_marginal_relevance_search_by_vector(embedding)
    content = [x.page_content for x in similar_docs]
    prompt_template = PromptTemplate.from_template(
    """\
    Given context about the subject, answer the question based on the context provided to the best of your ability.
    Context: {context}
    Question:
    {question}
    Answer:
    """
    )
    # Compte tenu du contexte du sujet, répond à la question en fonction du contexte fourni au mieux de tes capacités. Si l'information n'est pas dans le contexte, la réponse sera : 'Je n'ai pas l'information'. 
    # Contexte: {context}
    # Question:
    # {question}
    # réponse:
    # """
    # )
    # prompt_template = PromptTemplate.from_template(
    # """\
    # Given context about the subject, answer the question based on the context provided to the best of your ability.
    # Context: {context}
    # Question:
    # {question}
    # Answer:
    # """
    # )
    
    #prompt = prompt_template.format(context=content, question=question)
    prompt = prompt_template.format(context=similar_docs[0].page_content, question=question)
    answer = llm(prompt)
    return answer, similar_docs


In [26]:
def question_answer_context(question: str, vectorstore: Weaviate,previous_context:str):
    embedding = compute_embedding(question)
    similar_docs = vectorstore.max_marginal_relevance_search_by_vector(embedding)
    content = [x.page_content for x in similar_docs]
    # prompt_template = PromptTemplate.from_template(
    # """\
    # Compte tenu du contexte du sujet, répond à la question en fonction du contexte fourni au mieux de tes capacités.
    # Contexte: {context}
    # Question:
    # {question}
    # réponse:
    # """
    # )
    prompt_template = PromptTemplate.from_template(
    """\
    Given context about the subject, answer the question based on the context provided to the best of your ability. 
    Context: {context}
    This question takes place after this conversation : """+previous_context+"""
    Question:
    {question}
    Answer:
    """
    )
    prompt = prompt_template.format(context=content, question=question)
    print("prompt :")
    print(prompt)
    answer = llm(prompt)
    return answer, similar_docs


### Run RAG on a Question

In [125]:
client = weaviate.Client(weaviate_url)
vectorstore = Weaviate(client, "Doc", "text")

question = "Qu'est-ce que les ARTT"
answer, similar_docs = question_answer(GoogleTranslator(source='fr', target='en').translate(question), vectorstore)

print("\n\n\n-------------------------")
print(f"QUERY: {question}")
print("\n\n\n-------------------------")
print(f"Answer: {answer}")
print("\n\n\n-------------------------")
print(f"Answer trad: {GoogleTranslator(source='en', target='fr').translate(answer)}")
print("\n\n\n-------------------------")
for index, result in enumerate(similar_docs):
    print(f"\n\n-- RESULT {index+1}:\n")
    print(result)

prompt :
    Given context about the subject, answer the question based on the context provided to the best of your ability.
    Context: Les ARTT ou RTT (aménagement et réduction du temps de travail) correspondent aux jours dont bénéficie un salarié au-delà du nombre de jours de congés légaux ; ils correspondent à la récupération de temps de travail. La réduction du temps de travail par l'attribution de jours ARTT vous permet de vous absenter 1 à 2 jours par mois, en privilégiant, dans la mesure où cela est compatible avec l'organisation du service, les week-end de 3 jours. Le nombre de jours RTT est proratisé en fonction du taux d’activité dechacun. De plus, un certain nombre de jours d’absences autorisées peut minorer le nombre de jours ARTT.

Les droit à jours ARTT sur une année se calculent de la manière suivante sur la base d’un salarié à plein temps et hors dispositions conventionnelles particulières : - salariés non cadres : 21 ARTT (*) - salariés cadres : 19 ARTT (*) *Hors déc

Llama.generate: prefix-match hit


 ARTTs (Article 74 of the French Labor Code) are additional days off that an employee can take in addition to their regular annual leave. The number of ARTTs an employee is entitled to depends on their job classification and seniority within the company, with non-executive employees receiving more ARTTs than executive ones. ARTTs can be taken in half-day or full-day increments, with priority given to weekends. It is also possible to take a full week off by combining ARRTTs with regular annual leave. However, an employee cannot take more than 2 ARTTs in a week, and they must be compatible with the organization of the service.





-------------------------
QUERY: Qu'est-ce que les ARTT



-------------------------
Answer:  ARTTs (Article 74 of the French Labor Code) are additional days off that an employee can take in addition to their regular annual leave. The number of ARTTs an employee is entitled to depends on their job classification and seniority within the company, with non-execu

In [52]:
similar_docs[0].page_content

"Les ARTT ou RTT (aménagement et réduction du temps de travail) correspondent aux jours dont bénéficie un salarié au-delà du nombre de jours de congés légaux ; ils correspondent à la récupération de temps de travail. La réduction du temps de travail par l'attribution de jours ARTT vous permet de vous absenter 1 à 2 jours par mois, en privilégiant, dans la mesure où cela est compatible avec l'organisation du service, les week-end de 3 jours. Le nombre de jours RTT est proratisé en fonction du taux d’activité dechacun. De plus, un certain nombre de jours d’absences autorisées peut minorer le nombre de jours ARTT. Les droit à jours ARTT sur une année se calculent de la manière suivante sur la base d’un salarié à plein temps et hors dispositions conventionnelles particulières : - salariés non cadres : 21 ARTT (*) - salariés cadres : 19 ARTT (*)"

In [28]:
client = weaviate.Client(weaviate_url)
vectorstore = Weaviate(client, "Doc", "text")

question = "continu"

contexte_question = """
QUERY: Je suis arrivé le 1er juillet, combien ais-je de ARTT ? 

Answer:  En tant que salarié plein temps, vous disposez de 21 jours de RTT (Aménagement et Réduction du Temps de Travail) pour l'année 2023. Ces journées peuvent être prises sous forme de demi-journées ou de journées entières, avec possibilité d'en prendre deux. jours par mois. Cependant, vous ne pouvez pas prendre plus de deux jours par semaine et devez vous coordonner avec votre gestionnaire pour vous assurer que votre absence ne nuise pas au service de l'organisme.
Il est important de noter que le nombre de jours de RTT peut être réduit """

answer, similar_docs = question_answer_context(question, vectorstore,contexte_question)

print("\n\n\n-------------------------")
print(f"QUERY: {question}")
print("\n\n\n-------------------------")
print(f"Answer: {answer}")
print("\n\n\n-------------------------")
print(f"Answer trad: {GoogleTranslator(source='en', target='fr').translate(answer)}")
print("\n\n\n-------------------------")
for index, result in enumerate(similar_docs):
    print(f"\n\n-- RESULT {index+1}:\n")
    print(result)

Llama.generate: prefix-match hit


1. Pierre Olgiati (1968-1975)
2. Guy Verdeil (1975-1984)
3. Bernard Attali (1984-1986)
4. François Heilbronner (1986-1994)
5. Jean-Jacques Bonnaud (1994-1996)
6. Didier Pfeiffer (1996-1998)
7. Claude Zaouati (depuis 2017)

The conversation is about a person who has just joined the company on July 1st, and they are asking about their ARRT (Aménagement et Réduction du Temps de Travail). They are told that as a full-time employee, they will have 21 days of RTT for the year 2023, which can be taken in half or whole days, but they must coordinate with their line manager to ensure that their absence does not negatively impact the organization. It is important to note that the number of days of RTT may be reduced.


-------------------------
QUERY: continu



-------------------------
Answer: 1. Pierre Olgiati (1968-1975)
2. Guy Verdeil (1975-1984)
3. Bernard Attali (1984-1986)
4. François Heilbronner (1986-1994)
5. Jean-Jacques Bonnaud (1994-1996)
6. Didier Pfeiffer (1996-1998)
7. Claude Zao

In [None]:
client = weaviate.Client(weaviate_url)
vectorstore = Weaviate(client, "Doc", "text")

question = "Qui est le plus beau ?"

answer, similar_docs = question_answer_context(question, vectorstore,)

print("\n\n\n-------------------------")
print(f"QUERY: {question}")
print("\n\n\n-------------------------")
print(f"Answer: {answer}")
print("\n\n\n-------------------------")
print(f"Answer trad: {GoogleTranslator(source='en', target='fr').translate(answer)}")
print("\n\n\n-------------------------")
for index, result in enumerate(similar_docs):
    print(f"\n\n-- RESULT {index+1}:\n")
    print(result)

In [130]:
import pandas as pd
from tqdm import tqdm
import time


Q = pd.read_csv("test-llama-congés.csv",sep="|")

In [131]:

Q["Q_trad"] = ""
Q["A_raw"] = ""
Q["A_trad"] = ""
Q["result_raw"] = ""
Q["time"] = ""
for i in tqdm(Q.index):
    start_time = time.time()
    client = weaviate.Client(weaviate_url)
    vectorstore = Weaviate(client, "Doc", "text")
    question = Q["Q"][i]
    Q["Q_trad"][i] = GoogleTranslator(source='fr', target='en').translate(question)
    answer, similar_docs = question_answer(GoogleTranslator(source='fr', target='en').translate(question), vectorstore)
    Q["A_raw"][i] = answer
    Q["A_trad"][i] = GoogleTranslator(source='en', target='fr').translate(answer)
    for index, result in enumerate(similar_docs):
        Q["result_raw"][i] = Q["result_raw"][i] + f"\n\n-- Résultat {index+1}:\n"
        Q["result_raw"][i] = Q["result_raw"][i] + result.page_content

    Q["time"][i] = time.time() - start_time



  0%|          | 0/20 [00:00<?, ?it/s]Llama.generate: prefix-match hit


 Based on the context provided, you have the following leave entitlements:
      * Time-part workload (80% over 5 days or 80% over 4 days): [number of days]
      * Leave for family reasons (90% for 25 non-work days during summer vacation, 72% for 4 days of work and 20 non-work days during summer vacation): [number of days]
      * Senior leave (80% remunerated by using 21 days saved on the CET, 80% remunered by using 41 days saved on the CET): [number of days]
Note: The numbers provided are based on the context and may vary depending on individual circumstances.

  5%|▌         | 1/20 [00:51<16:12, 51.18s/it]Llama.generate: prefix-match hit


 Based on the information provided in the context, you have 21 ARTTs as a full-time employee and 19 ARTTs as a manager.

 10%|█         | 2/20 [01:58<18:12, 60.69s/it]Llama.generate: prefix-match hit


 * If you are a non-executive employee, you will have 21 ARRTs.
     * If you are an executive employee, you will have 19 ARRTs.

 15%|█▌        | 3/20 [02:07<10:32, 37.23s/it]Llama.generate: prefix-match hit


10 days
Explanation:
Based on the context provided, an employee has a total of 10 days off. This is because the ARTT calendar can be modified by the management team with a notice period of 10 calendar days, and additionally, employees themselves can modify their ARTT calendar with a notice period of 10 calendar days before the date they want to make changes.

 20%|██        | 4/20 [02:33<08:44, 32.81s/it]Llama.generate: prefix-match hit


 To take vacation days, you will need to follow the ARTT calendar policy. This policy states that any modifications to the calendar must be made at least 10 days in advance of the date they are to occur, unless there are exceptional circumstances. Additionally, the employees may modify their own calendars, but they must provide notice at least 10 days in advance of the change. It is important to carefully review the policy and understand the requirements before making any changes to your calendar.

 25%|██▌       | 5/20 [02:50<06:42, 26.84s/it]Llama.generate: prefix-match hit


 ARTTs (Aménagement et Réduction du Temps de Travail) are additional days off that an employee is entitled to beyond the legal number of paid leave days. They are used to reduce the number of working days, allowing employees to take 1 to 2 days off per month, with the organization's approval. The number of ARTTs is proportional to the employee's activity level.





 30%|███       | 6/20 [04:04<10:03, 43.12s/it]Llama.generate: prefix-match hit


 The CET (Comptes Épargne-Temps) is a savings account that allows employees to save money from their salaries and use it for various purposes such as vacations, or to receive different types of remuneration.
    The CET account can be used for the following:
     - Vacations: The employee can use the CET account to save money for future vacations by depositing a portion of their salary each month.
     - Remuneration: The employee can receive different types of remuneration such as bonuses, stock options, or other forms of compensation, which can be deposited into their CET account.
     - Other purposes: The employee can also use the CET account for other purposes such as paying off debts, purchasing assets, or investing in other financial instruments.
In summary, the CET is a savings account that allows employees to save money from their salaries and use it for various purposes, including vacations, remuneration, and other financial goals.

 35%|███▌      | 7/20 [04:45<09:11, 42.40s/it]Llama.generate: prefix-match hit


10 days
    Explanation:
    According to the context provided, you are allowed 10 days of absence from work without prior notice to your supervisor or HR department. This means that if you need to take more than 10 days off, you must provide at least 10 days' notice before your scheduled day off. Additionally, the ARRTT calendar can be modified by the management team with a minimum of 10 days' notice, and by employees with a minimum of 10 days' notice before their scheduled day off.

 40%|████      | 8/20 [05:15<07:40, 38.36s/it]Llama.generate: prefix-match hit


 To be able to split days, you will need to request a modification to your attendance schedule with at least 10 days' notice before the day you wish to split. This can be done through the ARRTT system or by speaking directly with your manager. You will also need to provide evidence of the reason for the split, such as a doctor's note or proof of an event that cannot be rescheduled. Once your request is approved, you will be able to split your days as needed.
    It is important to note that if you have not yet taken any days off, you may need to wait until you have taken at least one day before you can split it. Additionally, if you are within 10 days of your next scheduled workday, you will not be able to split it.
    Furthermore, if you are splitting your days for a medical reason, you may need to provide additional documentation to support your request. This could include a letter from your doctor or other medical professional explaining the nature of your illness and how it affect

 45%|████▌     | 9/20 [05:46<06:37, 36.15s/it]Llama.generate: prefix-match hit


 The concept of split days in the context of ARTT (Annualized Retention Time for Training) is as follows: When an employee takes more than 10 consecutive days off, either through vacation or sick leave, those days are considered "split days." This means that for each day taken off, two calendar days are added to the end of the ARRT period. For example, if an employee takes 15 days off consecutively, they will add 30 calendar days to the end of their ARRT period. Split days can also be modified by the management team or employees under certain circumstances before the date of modification, just like any other modification to the ARRT schedule. The purpose of split days is to allow for flexibility in scheduling and to account for unforeseen absences that may occur throughout the year.

 50%|█████     | 10/20 [06:11<05:26, 32.64s/it]Llama.generate: prefix-match hit


 Based on the context provided, you can take days off at any time within the designated time frame of 10 days before the change is to occur, unless there are exceptional circumstances. Additionally, if you are an employee, you may be able to take days off with a notice period of 10 days before the change is to occur.

 55%|█████▌    | 11/20 [06:24<03:58, 26.50s/it]Llama.generate: prefix-match hit


 Yes, there are some compulsory holidays that cannot be changed by the management or employees. These holidays are fixed and cannot be altered under any circumstances. Examples of compulsory holidays include New Year's Day (Jan 1st), Good Friday (Friday before Easter), May Day (May 1st), Independence Day (Date varies), and Christmas Day (December 25th).

 60%|██████    | 12/20 [06:39<03:04, 23.05s/it]Llama.generate: prefix-match hit


 To accumulate leave, you need to first create a leave calendar in the ARTT system. The leave calendar is a tool that allows you to manage and track your leave requests. Once you have created your leave calendar, you can start accumulating leave by adding your absences to the calendar. You will need to specify the reason for your absence and the number of days you want to take off. Once you have added your absence to the calendar, it will be automatically calculated and added to your leave balance. You can view your leave balance at any time by logging into the ARTT system and checking your leave calendar.
Explanation:
To accumulate leave in the ARTT system, you first need to create a leave calendar. This is a tool that allows you to manage and track your leave requests. Once you have created your leave calendar, you can start accumulating leave by adding your absences to the calendar. You will need to specify the reason for your absence and the number of days you want to take off. Onc

 65%|██████▌   | 13/20 [07:15<03:08, 26.97s/it]Llama.generate: prefix-match hit


 Yes, you can take days off within the framework of the ARTT calendar. However, before making any changes to the calendar, you are required to provide 10-day advance notice to your supervisor or the management team, unless there are exceptional circumstances that make it impossible for you to do so.
     Additionally, if you are an employee, you can also modify your personal days off within the framework of the ARTT calendar, but you must provide at least 10-day advance notice before making any changes.
Note: The context provided is for a hypothetical scenario, and it may not be applicable in all situations.

 70%|███████   | 14/20 [07:35<02:29, 24.84s/it]Llama.generate: prefix-match hit


 You can view your leave by logging into the company's payroll software called "eContactPaie" using your unique login credentials. Once logged in, you can access your leave balance and any pending requests for time off through the "My Leave" section. You will also receive notifications via email or SMS when a new leave request is submitted or when a leave request is approved/rejected.
Additionally, if you have been absent due to illness or accident, you must inform your manager within 3 days and provide a valid reason for your absence. You can do this by sending an email to the Centre d'Expertise RH Paie at [insert address] or by using the online form available on the company's intranet.
It is also important to note that if you are experiencing an unexpected absence due to force majeure, you must notify your manager within 48 hours of your expected return date.

 75%|███████▌  | 15/20 [08:30<02:49, 33.97s/it]Llama.generate: prefix-match hit


 Yes, you can be refused leave. As an employer on the platform eTemptation, you have the ability to refuse leave requests from your employees if they do not meet certain criteria or requirements.
For example, you may refuse leave if an employee has not provided sufficient notice or if their request conflicts with a critical business need. Additionally, you may also refuse leave if the employee has not met the required performance standards or if their absence would cause significant disruption to the workplace.
It's important to note that refusing leave can have legal implications, so it's essential to understand and comply with relevant employment laws and regulations in your jurisdiction. As an employer on eTemptation, you may want to consider consulting with HR or legal counsel before making any decisions regarding leave refusal.

 80%|████████  | 16/20 [09:35<02:53, 43.48s/it]Llama.generate: prefix-match hit


 The number of days off that an employee can take in August is subject to the company's attendance policy and the current labor law. Generally, an employee is entitled to take up to 10 days of absence in a year, but this can be reduced or increased depending on the circumstances. Additionally, if an employee is absent for more than 3 consecutive days, their job may be considered abandoned under the Labor Law, which could result in termination of employment.
    In the given context, the employee is entitled to take up to 10 days of absence in August, provided they give the required notice and follow the company's attendance policy. However, if the employee is absent for more than 3 consecutive days, their job may be considered abandoned under the Labor Law, which could result in termination of employment.

 85%|████████▌ | 17/20 [10:13<02:04, 41.57s/it]Llama.generate: prefix-match hit


 Yes, you can take a day off to take care of your sick child. As long as you have been with the company for at least 12 months in the last 5 years and you have children under the age of 7, you are eligible for a day of absence without loss of pay on the first day of school each year. Additionally, if you are unable to work due to illness on the day of the school opening, you can take an additional day off without losing pay. However, this day cannot be taken during paid holidays or vacation days.

 90%|█████████ | 18/20 [11:15<01:35, 47.82s/it]Llama.generate: prefix-match hit


 Yes, you are entitled to additional leave with children as a parent of a child with a disability. As a salaried employee in France, you may be eligible for various benefits and allowances to help manage the challenges of raising a child with special needs. These benefits can include:
* 2 days of additional leave upon diagnosis of a disability
* Adaptation of work hours to accommodate the needs of your child
* Systematic right to part-time work with certain conditions, including the possibility of having the difference in retirement contributions paid on behalf of the employee
* 8 days of additional unpaid leave per year for managing the administrative and medical requirements related to your child's disability
* A lump sum payment of 1200€ to help cover the costs of raising a child with special needs.
It is important to note that these benefits are only available to employees who have a child with a disability, and the specific eligibility criteria and benefits may vary depending on t

 95%|█████████▌| 19/20 [12:13<00:51, 51.00s/it]Llama.generate: prefix-match hit


 In the event of an unexpected absence, you must inform your manager within 48 hours, unless there are exceptional circumstances. You must also send a copy of your leave notice to the HR department at the company's headquarters in Merignac. Additionally, if you are absent due to illness or injury, you must justify your absence within three days by sending an notice of absence to the Payroll Center at the following address: GROUPAMA Centre d’Expertise RH TSA 40021 33688 MERIGNAC Cedex. You also have the option of sending the notice of illness via a form on eContactPaie. It is important to note that in the event of an accident at work or travel, you must report it by calling the emergency number 01.70.94.26.26. If you do not receive a response from your manager, you can also pass through the eContactPaie portal.

100%|██████████| 20/20 [13:11<00:00, 39.58s/it]


In [None]:

Q["Q_trad"] = ""
Q["A_raw"] = ""
Q["A_trad"] = ""
Q["result_raw"] = ""
Q["result_trad"] = ""
Q["time"] = ""
for i in tqdm(Q.index):
    start_time = time.time()
    client = weaviate.Client(weaviate_url)
    vectorstore = Weaviate(client, "Doc", "text")
    question = Q["Q"][i]
    question_trad = GoogleTranslator(source='fr', target='en').translate(question) 
    Q["Q_trad"][i] = question_trad
    answer, similar_docs = question_answer(question, vectorstore)
    Q["A_raw"][i] = answer
    Q["A_trad"][i] = GoogleTranslator(source='en', target='fr').translate(answer)
    # for index, result in enumerate(similar_docs):
    #     Q["result_raw"][i] = Q["result_raw"][i] + f"\n\n-- Résultat {index+1}:\n"
    #     Q["result_raw"][i] = Q["result_raw"][i] + str(result)

    Q["time"][i] = time.time() - start_time



In [128]:
Q.to_excel("test-llama-congés-QEN-CONTEN.xlsx",index=False)