In [1]:
import json

# Charger le dataset
with open('data_modified_final.json', 'r') as f:
    data = json.load(f)

# Afficher quelques exemples pour vérifier
for question in data['questions'][:3]:
    print(question)


{'question': "Quel est l'algorithme de tri le plus rapide ?", 'options': ['Tri par insertion', 'Tri fusion', 'Tri rapide', 'Tri Ã\xa0 bulles'], 'answer': 'Tri rapide', 'difficulte': 1, 'duree_reponse': '4 minute(s)', 'marks': 1}
{'question': 'Quelle est la complexitÃ© temporelle du tri fusion ?', 'options': ['O(n log n)', 'O(n^2)', 'O(n)', 'O(log n)'], 'answer': 'O(n log n)', 'difficulte': 3, 'duree_reponse': '4 minute(s)', 'marks': 2}
{'question': "Quel est l'algorithme de recherche le plus efficace ?", 'options': ['Recherche sÃ©quentielle', 'Recherche binaire', 'Recherche exponentielle', 'Recherche linÃ©aire'], 'answer': 'Recherche binaire', 'difficulte': 1, 'duree_reponse': '5 minute(s)', 'marks': 1}


In [2]:
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer

# Extraire les questions et les réponses
questions = [q['question'] for q in data['questions']]
options = [q['options'] for q in data['questions']]
answers = [q['answer'] for q in data['questions']]
difficulties = [q['difficulte'] for q in data['questions']]
durations = [q['duree_reponse'] for q in data['questions']]
marks = [q['marks'] for q in data['questions']]

# Encoder les textes
vectorizer = CountVectorizer()
X_questions = vectorizer.fit_transform(questions).toarray()

# Encoder les labels de difficulté
le_difficulty = LabelEncoder()
y_difficulties = le_difficulty.fit_transform(difficulties)

# Afficher quelques exemples encodés pour vérifier
print(X_questions[:3])
print(y_difficulties[:3])


[[0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
  1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
  0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
  1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0]]
[0 2 0]


In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Définir le modèle
model = Sequential([
    Dense(128, input_dim=X_questions.shape[1], activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='linear')  # Prédiction de la difficulté
])

# Compiler le modèle
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

# Afficher le résumé du modèle
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               19456     
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dense_2 (Dense)             (None, 1)                 65        
                                                                 
Total params: 27,777
Trainable params: 27,777
Non-trainable params: 0
_________________________________________________________________


In [4]:
# Entraîner le modèle
model.fit(X_questions, y_difficulties, epochs=10, batch_size=32, validation_split=0.2)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x19858d2e410>

In [5]:
import numpy as np

def generate_exam(model, vectorizer, params):
    """
    Génère des sujets d'examen équivalents basés sur les paramètres fournis.
    """
    generated_exams = []
    total_points = params['total_points']
    total_duration = params['total_duration']
    easy_percentage = params['easy_percentage']
    medium_percentage = params['medium_percentage']
    hard_percentage = params['hard_percentage']

    # Génération de sujets
    for _ in range(4):  # Générer 4 sujets
        subject = []
        current_points = 0
        current_duration = 0
        easy_questions = []
        medium_questions = []
        hard_questions = []

        while current_points < total_points and current_duration < total_duration:
            # Sélectionner une question aléatoire
            idx = np.random.choice(len(questions))
            question = questions[idx]
            encoded_question = vectorizer.transform([question]).toarray()
            difficulty = model.predict(encoded_question)

            if difficulty == 1 and len(easy_questions) < (easy_percentage * total_points):
                easy_questions.append(question)
            elif difficulty == 2 and len(medium_questions) < (medium_percentage * total_points):
                medium_questions.append(question)
            elif difficulty == 3 and len(hard_questions) < (hard_percentage * total_points):
                hard_questions.append(question)
            
            subject.append(question)
            current_points += marks[idx]
            current_duration += int(durations[idx].split()[0])  # Convertir la durée en minutes

        generated_exams.append(subject)
    
    return generated_exams

# Paramètres de l'examen
params = {
    'total_points': 20,
    'total_duration': 45,
    'easy_percentage': 0.3,
    'medium_percentage': 0.5,
    'hard_percentage': 0.2
}

# Générer les examens
exams = generate_exam(model, vectorizer, params)
for exam in exams:
    print(exam)


["RÃ©pÃ©ter <traitement> jusqu'a <condition> est une ______?", "Quelle est la complexitÃ© temporelle de l'algorithme de Floyd-Warshall ?", "Quelle est la complexitÃ© temporelle de l'algorithme de Kruskal ?", "Quel est l'algorithme utilisÃ© pour effectuer une recherche en largeur dans un graphe ?", "Quelle est la complexitÃ© temporelle de l'algorithme de recherche linÃ©aire ?", 'Quelle est la diffÃ©rence entre un organigramme et un pseudocode?', "Comment mesure-t-on la complexitÃ© temporelle d'un algorithme ?", 'Si ....... Alors ....... Sinon ....... Fin Si vÃ©rifier ____?', "Comment mesure-t-on la complexitÃ© temporelle d'un algorithme ?"]
["Quel est l'algorithme utilisÃ© pour effectuer une recherche en largeur dans un graphe ?", "Combien de comparaisons effectuera l'algorithme de tri Ã\xa0 bulles si on l'applique Ã\xa0 un tableau de 20 Ã©lÃ©ments dÃ©jÃ\xa0 triÃ© ?", "L'arbre de Huffman attribue :", 'Quelle est la diffÃ©rence entre un organigramme et un pseudocode?', "Quel est l'algori