## Mapper et importer les données

In [1]:
from elasticsearch import Elasticsearch

# Connexion à Elasticsearch
es = Elasticsearch("http://localhost:9200")



In [None]:
# Supprimer l'index "notes" s'il existe
if es.indices.exists(index="notes"):
    es.indices.delete(index="notes")

In [2]:
from elasticsearch import Elasticsearch

# Connexion à Elasticsearch
es = Elasticsearch("http://localhost:9200")

# Mapping de l'index "notes"
mapping = {
    "mappings": {
        "properties": {
            "patient_lastname": {"type": "keyword"},
            "patient_firstname": {"type": "keyword"},
            "text": {"type": "text", "analyzer": "standard"},
            "date": {"type": "date"},
            "patient_left": {"type": "boolean"},
            "emotion": {"type": "keyword"},
            "confidence": {"type": "float"}
        }
    }
}

# Création de l'index "notes" avec le mapping
es.indices.create(index="notes", body=mapping)



  es.indices.create(index="notes", body=mapping)


BadRequestError: BadRequestError(400, 'resource_already_exists_exception', 'index [notes/Hv32QN78QDy1jOdPHjh9vQ] already exists')

In [None]:
# Import des données à partir du jeu de données et de la bibliothèque Faker
# Remplacez cette partie par votre propre logique d'importation de données
from faker import Faker
import csv

fake = Faker()

with open("../data/Emotion_final.csv", "r") as file:
    reader = csv.DictReader(file)
    for row in reader:
        document = {
            "patient_lastname": fake.last_name(),
            "patient_firstname": fake.first_name(),
            "text": row["Text"],
            "date": fake.date(),
            "patient_left": fake.boolean(),
            "emotion": "Model",  # Remplacez par votre propre logique pour remplir ce champ
            "confidence": "Model"  # Remplacez par votre propre logique pour remplir ce champ
        }
        es.index(index="notes", body=document)


## Requêtes 

Recherche de la répartition des sentiments des textes pour un patient :

In [None]:
from elasticsearch import Elasticsearch
import pandas as pd

es = Elasticsearch("http://localhost:9200")

# Recherche pour un patient spécifique
patient_lastname = "Doe"
patient_firstname = "John"

query = {
    "query": {
        "bool": {
            "must": [
                {"match": {"patient_lastname": patient_lastname}},
                {"match": {"patient_firstname": patient_firstname}}
            ]
        }
    },
    "aggs": {
        "sentiment_distribution": {
            "terms": {"field": "emotion.keyword"}
        }
    }
}

result = es.search(index="notes", body=query, size=0)
aggregations = result["aggregations"]["sentiment_distribution"]["buckets"]

df_sentiment_distribution = pd.DataFrame(aggregations)
print(df_sentiment_distribution)

Élaboration d'une matrice de sentiments contradictoires :

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Liste des sentiments
sentiments = ["happy", "sad", "angry", "calm"]

contradiction_matrix = []

for sentiment in sentiments:
    query = {
        "query": {
            "bool": {
                "must": [
                    {"match": {"emotion": "happy"}},  # Remplacez "happy" par le sentiment souhaité
                    {"match": {"text": "sadness"}}  # Remplacez "sadness" par le mot à rechercher
                ]
            }
        }
    }

    result = es.search(index="notes", body=query, size=0)
    total_documents = result["hits"]["total"]["value"]

    percentage = (total_documents / len(df_sentiment_distribution)) * 100

    contradiction_matrix.append(percentage)

heatmap_data = pd.DataFrame(contradiction_matrix, index=sentiments, columns=["Percentage"])
sns.heatmap(heatmap_data, annot=True, cmap="YlGnBu")
plt.xlabel("Contradictory Word")
plt.ylabel("Sentiment")
plt.title("Contradictory Word Percentage by Sentiment")
plt.show()

Recherche du nombre de textes correspondants aux différentes étapes du deuil :

In [None]:
stages_of_grief = ["denial", "anger", "bargaining", "depression", "acceptance"]

for stage in stages_of_grief:
    # Recherche pleine
    query_full = {
        "query": {
            "match": {"text": stage}
        }
    }

    result_full = es.search(index="notes", body=query_full)
    total_documents_full = result_full["hits"]["total"]["value"]

    # Recherche fuzzy
    query_fuzzy = {
        "query": {
            "fuzzy": {"text": {"value": stage}}
        }
    }

    result_fuzzy = es.search(index="notes", body=query_fuzzy)
    total_documents_fuzzy = result_fuzzy["hits"]["total"]["value"]

    print(f"Stage: {stage}")
    print(f"Total Documents (Full): {total_documents_full}")
    print(f"Total Documents (Fuzzy): {total_documents_fuzzy}")
    print()

Recherche des textes avec différents critères :

In [None]:
query = {
    "query": {
        "bool": {
            "must": [
                {"match": {"text": "good day"}},  # Doit contenir "good day"
                {"term": {"patient_left": False}}  # Les patients sont encore en consultation
            ],
            "should": [
                {"match": {"text": "to rest"}}  # Doit contenir "to rest" si possible
            ],
            "filter": [
                {"range": {"confidence": {"gte": 0.5}}}  # Confidence supérieure ou égale à 0.5
            ]
        }
    },
    "aggs": {
        "sentiment_distribution": {
            "terms": {"field": "emotion.keyword"}
        }
    }
}

result = es.search(index="notes", body=query)
hits = result["hits"]["hits"]

df_results = pd.DataFrame(hits)
print(df_results)