In [1]:
import os
import time
import logging
import pandas as pd
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_community.vectorstores import FAISS
from langchain_core.messages import SystemMessage, HumanMessage
from codecarbon import EmissionsTracker


In [2]:
logging.getLogger("codecarbon").setLevel(logging.ERROR)

# 1. Désactiver les logs de CodeCarbon et de TensorFlow
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Coupe les logs TF
logging.getLogger("codecarbon").setLevel(logging.ERROR) # Coupe les logs CodeCarbon


In [3]:
# 1. Configuration des Embeddings
embeddings = OllamaEmbeddings(model="mxbai-embed-large")

# 2. Chargement de l'index
vector_db = FAISS.load_local(
    "faiss_index_pfe", 
    embeddings, 
    allow_dangerous_deserialization=True
)

In [4]:
# 3. Fonction de recherche + réponse modifiée
def ask_question(query, llm_model="mistral"):
    # Configuration du LLM
    llm = ChatOllama(model=llm_model, temperature=0,num_gpu=99,num_thread=4)

    tracker = EmissionsTracker(save_to_file=False , log_level="error")
    tracker.start()
    
    start_time = time.time() # Début du chrono
    # Recherche des documents pertinents (on récupère les objets 'Document' entiers)
    retriever = vector_db.as_retriever(search_type="mmr", search_kwargs={'k': 6, 'fetch_k': 20})
    docs = retriever.invoke(query)
    # Préparation du contexte texte
    context_chunks = []
    #print("\n=== DOCUMENTS AND CONTEXT CHUNKS ===")
    for i, doc in enumerate(docs):
        # Découpe le texte en chunks
        text = doc.page_content
        context_chunks.append(text)
    # Combine les chunks pour le LLM
    context_for_llm = "\n\n".join(context_chunks)
# Prompt système optimisée pour un rôle de Tuteur CNN
    system_prompt = (
    "ROLE: CNN Specialist Assistant - STRICT DATA RETRIEVAL ONLY.\n\n"
    
    "=== THE GOLDEN RULE ===\n"
    "You are an AI assistant specialized ONLY in Convolutional Neural Networks (CNN). "
    "Your knowledge is strictly limited to the provided documentation (RAG).\n\n"
    
    "=== STRICT BEHAVIORAL PROTOCOL ===\n"
    "If the answer is not explicitly found within the provided context, or if the question is outside the scope of Convolutional Neural Networks (CNN), you must respond EXACTLY and ONLY with: 'I don't know.' Do not provide any other text."
    
    "2. 100% GROUNDING: Do not use your own training data. You are forbidden from "
    "generating information that is not explicitly present in the provided context. "
    "If the answer is missing from the context: 'I don't know.'\n"
    
    "3. 100% CONTEXT: You must answer based ONLY on the documentation. Do not add outside "
    
    
    "4. NO FORMATTING/PREAMBLE: Do not use greetings (Hello, Hi). Start the answer "
    "directly with the relevant technical content or the refusal string.\n\n"
    
    "=== VERIFICATION ===\n"
    "Question is CNN + In Context -> Direct answer.\n"
    "Question is NOT CNN OR Not In Context -> I don't know."
    )
    messages = [
        SystemMessage(content=system_prompt),
        HumanMessage(content=(
            f"STUDY CONTEXT:\n{context_for_llm}\n\n"
            f"QUESTION DE L'ÉTUDIANT : {query}\n\n"
            "RÉPONSE DU TUTEUR :"
        ))
    ]
    
    # Génération de la réponse
    response = llm.invoke(messages)

    end_time = time.time() # Fin du chrono
    emissions = tracker.stop() # Fin du suivi carbone

    inference_time = end_time - start_time
    # On retourne la réponse ET les documents sources
    return response.content , inference_time, emissions



In [5]:
# ============================================================================
# FONCTION D'ÉVALUATION
# ============================================================================
def evaluer_tuteur(tests, llm_model="mistral"):
    
    resultats = []
    score_total = 0
    total_time = 0
    total_co2 = 0
    
    for i, test in enumerate(tests, 1):
        print(f"[{i}/{len(tests)}] Question: {test['question']}", end="\r")
        
        try:
            # Appel de la fonction
            reponse, duration, co2 = ask_question(test['question'], llm_model=llm_model)
            
            total_time += duration
            total_co2 += co2
            
            # Analyse de la réponse
            status = "FAIL"
            details = ""
            
            if test['doit_repondre']:
                mots_trouves = [m for m in test['mots_cles'] if m.lower() in reponse.lower()]
                taux_reussite = len(mots_trouves) / len(test['mots_cles'])
                if taux_reussite >= 0.5:
                    status = "PASS"
                    score_total += 1
                details = f"Mots-clés: {len(mots_trouves)}/{len(test['mots_cles'])}"
            else:
                if "i don't know" in reponse.lower():
                    status = "PASS (Refusé)"
                    score_total += 1
                else:
                    status = "HALLUCINATION"
                details = "Vérification du refus"

            # Stockage des résultats
            resultats.append({
                "Question": test['question'],
                "Status": status,
                "Temps (s)": round(duration, 2),
                "CO2 (kg)": f"{co2:.8f}"
            })
            
            # Petit repos pour éviter le Timeout d'Ollama
            time.sleep(1) 

        except Exception as e:
            print(f"\nErreur sur la question {i}: {e}")
            continue

    # --- BILAN FINAL ---
    print("\n" + "=" * 80)
    print(f"{'BILAN GLOBAL':^80}")
    print("=" * 80)
    print(f"Score Final          : {score_total}/{len(tests)} ({score_total/len(tests)*100:.1f}%)")
    print(f"Temps Total Cumulé   : {total_time:.2f} secondes")
    print(f"Moyenne par Question : {total_time/len(tests):.2f} secondes")
    print(f"Total Émissions CO2  : {total_co2:.8f} kg CO2")
    print("=" * 80)
    
    return pd.DataFrame(resultats)

In [6]:
# ============================================================================
# QUESTIONS DE TEST
# ============================================================================
tests = [
    # --- QUESTIONS THÉORIQUES (CNN) ---
    {"question": "What is a CNN?", "mots_cles": ["convolutional", "neural", "network"], "doit_repondre": True},
    {"question": "Explain pooling in CNN", "mots_cles": ["pooling", "max", "average"], "doit_repondre": True},
    {"question": "What is Max Pooling?", "mots_cles": ["max", "pooling", "spatial", "reduction"], "doit_repondre": True},
    {"question": "Define a filter in CNN", "mots_cles": ["filter", "kernel", "weights", "feature"], "doit_repondre": True},
    {"question": "What is a stride?", "mots_cles": ["stride", "step", "pixel", "movement"], "doit_repondre": True},
    {"question": "What is padding?", "mots_cles": ["padding", "zero", "border", "size"], "doit_repondre": True},
    {"question": "Explain ReLU activation", "mots_cles": ["relu", "activation", "non-linear", "negative"], "doit_repondre": True},
    {"question": "What is a feature map?", "mots_cles": ["feature", "map", "output", "layer"], "doit_repondre": True},
    {"question": "Explain the convolutional layer", "mots_cles": ["convolutional", "layer", "extraction"], "doit_repondre": True},
    {"question": "What is a fully connected layer?", "mots_cles": ["fully", "connected", "dense", "classification"], "doit_repondre": True},
    {"question": "What is Softmax?", "mots_cles": ["softmax", "probability", "output"], "doit_repondre": True},
    {"question": "What is dropout?", "mots_cles": ["dropout", "overfitting", "randomly"], "doit_repondre": True},
    {"question": "Explain backpropagation in CNN", "mots_cles": ["gradient", "backward", "weights", "update"], "doit_repondre": True},
    {"question": "What is the kernel size?", "mots_cles": ["kernel", "size", "dimension", "width"], "doit_repondre": True},
    {"question": "Difference between CNN and MLP?", "mots_cles": ["spatial", "convolution", "flatten"], "doit_repondre": True},

    # --- QUESTIONS DE CODE PYTHON ---
    {"question": "Give me a simple Python code for a CNN using Sequential API", "mots_cles": ["models.Sequential", "layers.Conv2D", "layers.MaxPooling2D"], "doit_repondre": True},
    {"question": "Provide the Python code to add a Dropout layer with a 0.5 rate", "mots_cles": ["layers.Dropout(0.5)", "training", "overfitting"], "doit_repondre": True},
    {"question": "Show me the Python code to compile a CNN with Adam optimizer", "mots_cles": ["model.compile", "optimizer='adam'", "loss='categorical_crossentropy'"], "doit_repondre": True},
    {"question": "Write a Python function to preprocess images to 128x128 for a CNN", "mots_cles": ["image_dataset_from_directory", "image_size=(128, 128)"], "doit_repondre": True},
    {"question": "How to write the code for a GlobalAveragePooling2D layer in Python?", "mots_cles": ["layers.GlobalAveragePooling2D()", "reduction", "spatial"], "doit_repondre": True},

    # --- QUESTIONS HORS-SUJET (REFUS) ---
    {"question": "What is the weather in Paris?", "phrase_refus": "I don't know.", "doit_repondre": False},
    {"question": "How to bake a chocolate cake?", "phrase_refus": "I don't know.", "doit_repondre": False},
    {"question": "Who is the president of France?", "phrase_refus": "I don't know.", "doit_repondre": False},
    {"question": "Explain quantum physics", "phrase_refus": "I don't know.", "doit_repondre": False},
    {"question": "What is the capital of Japan?", "phrase_refus": "I don't know.", "doit_repondre": False}
]

# Evaluation de Mistral

In [7]:
# Exécution
df_stats = evaluer_tuteur(tests, llm_model="mistral")
# Afficher le tableau proprement
print(df_stats.to_string(index=False))

[25/25] Question: What is the capital of Japan?e?balAveragePooling2D layer in Python?
                                  BILAN GLOBAL                                  
Score Final          : 21/25 (84.0%)
Temps Total Cumulé   : 1682.08 secondes
Moyenne par Question : 67.28 secondes
Total Émissions CO2  : 0.00544432 kg CO2
                                                           Question        Status  Temps (s)   CO2 (kg)
                                                     What is a CNN?          PASS      67.07 0.00021641
                                             Explain pooling in CNN          PASS      57.86 0.00018812
                                               What is Max Pooling?          PASS      64.67 0.00021129
                                             Define a filter in CNN          PASS      52.35 0.00017060
                                                  What is a stride?          PASS      64.91 0.00021174
                                                   Wh

# Evaluation de DeepSeek Coder

In [8]:
# Exécution
df_stats = evaluer_tuteur(tests, llm_model="deepseek-coder")
# Afficher le tableau proprement
print(df_stats.to_string(index=False))

[25/25] Question: What is the capital of Japan?e?balAveragePooling2D layer in Python?
                                  BILAN GLOBAL                                  
Score Final          : 10/25 (40.0%)
Temps Total Cumulé   : 880.94 secondes
Moyenne par Question : 35.24 secondes
Total Émissions CO2  : 0.00284304 kg CO2
                                                           Question        Status  Temps (s)   CO2 (kg)
                                                     What is a CNN?          PASS      13.98 0.00004470
                                             Explain pooling in CNN          PASS       8.77 0.00002981
                                               What is Max Pooling?          PASS       5.27 0.00001754
                                             Define a filter in CNN          FAIL       5.25 0.00001758
                                                  What is a stride?          FAIL       6.91 0.00002315
                                                   Wha