In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from keras.callbacks import EarlyStopping

def evaluate_models(data):
    # Charger votre DataFrame (assurez-vous d'avoir une colonne 'text' contenant le texte et une colonne 'category' pour les étiquettes)
    df = pd.read_csv(data)

    # Séparer les données en ensembles d'entraînement et de test
    X = df['text']
    y = df['category']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Encoder les étiquettes
    label_encoder = LabelEncoder()
    y_train_encoded = label_encoder.fit_transform(y_train)
    y_test_encoded = label_encoder.transform(y_test)

    # Tokenization et padding des séquences
    max_words = 1000
    tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
    tokenizer.fit_on_texts(X_train)
    X_train_sequences = tokenizer.texts_to_sequences(X_train)
    X_test_sequences = tokenizer.texts_to_sequences(X_test)
    max_sequence_length = 100  # Vous pouvez ajuster la longueur maximale de la séquence selon vos besoins
    X_train_padded = pad_sequences(X_train_sequences, maxlen=max_sequence_length, padding='post', truncating='post')
    X_test_padded = pad_sequences(X_test_sequences, maxlen=max_sequence_length, padding='post', truncating='post')

    # Créer le modèle LSTM
    embedding_dim = 100
    model = Sequential()
    model.add(Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_sequence_length))
    model.add(LSTM(100))  # Vous pouvez ajuster le nombre de neurones LSTM selon vos besoins
    model.add(Dense(len(label_encoder.classes_), activation='softmax'))  # Couche de sortie avec le nombre de classes

    # Compiler le modèle
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Entraîner le modèle avec EarlyStopping pour éviter le surapprentissage
    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    model.fit(X_train_padded, y_train_encoded, epochs=20, batch_size=64, validation_split=0.2, callbacks=[early_stopping])

    # Évaluer le modèle
    test_loss, test_accuracy = model.evaluate(X_test_padded, y_test_encoded)
    print(f"Test Accuracy: {test_accuracy}")

    # Faites des prédictions
    predictions = model.predict(X_test_padded)

    # Vous pouvez décoder les prédictions si nécessaire
    decoded_predictions = label_encoder.inverse_transform(predictions.argmax(axis=1))

    # Calcul des métriques
    accuracy = accuracy_score(y_test_encoded, decoded_predictions)
    precision = precision_score(y_test_encoded, decoded_predictions, average='weighted')
    recall = recall_score(y_test_encoded, decoded_predictions, average='weighted')
    f1 = f1_score(y_test_encoded, decoded_predictions, average='weighted')

    # Présenter une synthèse comparative des différents modèles
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")

# Exemple d'utilisation de la fonction
evaluate_models('votre_dataframe.csv')


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from keras.callbacks import EarlyStopping
from nlpaug.util.file.download import DownloadUtil
import nlpaug.augmenter.word as naw

def data_augmentation(data):
    # Charger votre DataFrame (assurez-vous d'avoir une colonne 'text' contenant le texte et une colonne 'category' pour les étiquettes)
    df = pd.read_csv(data)

    # Séparer les données en ensembles d'entraînement et de test
    X = df['text']
    y = df['category']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Data augmentation avec NLPAug
    aug = naw.SynonymAug(aug_src='wordnet')
    augmented_X_train = [aug.augment(text) for text in X_train]

    # Tokenization et padding des séquences
    max_words = 1000
    tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
    tokenizer.fit_on_texts(X_train)
    X_train_sequences = tokenizer.texts_to_sequences(augmented_X_train)
    X_test_sequences = tokenizer.texts_to_sequences(X_test)
    max_sequence_length = 100  # Vous pouvez ajuster la longueur maximale de la séquence selon vos besoins
    X_train_padded = pad_sequences(X_train_sequences, maxlen=max_sequence_length, padding='post', truncating='post')
    X_test_padded = pad_sequences(X_test_sequences, maxlen=max_sequence_length, padding='post', truncating='post')

    # Créer le modèle LSTM
    embedding_dim = 100
    model = Sequential()
    model.add(Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_sequence_length))
    model.add(LSTM(100))  # Vous pouvez ajuster le nombre de neurones LSTM selon vos besoins
    model.add(Dense(len(label_encoder.classes_), activation='softmax'))  # Couche de sortie avec le nombre de classes

    # Compiler le modèle
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Entraîner le modèle avec EarlyStopping pour éviter le surapprentissage
    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    model.fit(X_train_padded, y_train_encoded, epochs=20, batch_size=64, validation_split=0.2, callbacks=[early_stopping])

    # Évaluer le modèle
    test_loss, test_accuracy = model.evaluate(X_test_padded, y_test_encoded)
    print(f"Test Accuracy: {test_accuracy}")

    # Présenter une synthèse comparative des améliorations de performance
    print("Comparaison avant et après l'augmentation de données :")
    print(f"Accuracy avant augmentation : {accuracy_before_augmentation}")
    print(f"Accuracy après augmentation : {test_accuracy}")

# Exemple d'utilisation de la fonction
data_augmentation('votre_dataframe.csv')


In [None]:
import os
import cv2
import numpy as np
from keras.applications.vgg16 import VGG16, decode_predictions
from keras.preprocessing.image import load_img, img_to_array, preprocess_input
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras import optimizers

def load_data(data_dir):
    images = []
    labels = []

    for class_folder in os.listdir(data_dir):
        class_folder_path = os.path.join(data_dir, class_folder)

        if os.path.isdir(class_folder_path):
            for image_file in os.listdir(class_folder_path):
                if image_file.endswith('.jpg'):
                    image_path = os.path.join(class_folder_path, image_file)
                    image = cv2.imread(image_path)
                    if image is not None:
                        images.append(image)
                        labels.append(class_folder)

    return np.array(images), np.array(labels)

def preprocess_data(images, labels, test_size=0.2, random_state=42):
    images = images / 255.0
    train_images, test_images, train_labels, test_labels = train_test_split(
        images, labels, test_size=test_size, random_state=random_state)
    
    return train_images, test_images, train_labels, test_labels

def create_custom_model(input_shape, num_classes):
    model = Sequential()
    model.add(Conv2D(64, (3, 3), input_shape=input_shape, padding='same', activation='relu'))
    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dense(4096, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    return model

def load_vgg16_model(num_classes):
    model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    
    # Add custom classification layers
    x = model.output
    x = Flatten()(x)
    x = Dense(4096, activation='relu')(x)
    x = Dense(4096, activation='relu')(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    
    # Create a new model
    custom_model = Model(inputs=model.input, outputs=predictions)
    
    return custom_model

def fine_tune_total(model):
    for layer in model.layers:
        layer.trainable = True

def feature_extraction(model):
    for layer in model.layers:
        layer.trainable = False

def fine_tune_partial(model, num_layers_to_freeze):
    for layer in model.layers[:num_layers_to_freeze]:
        layer.trainable = False

def train_model(model, train_images, train_labels, epochs=10, batch_size=32):
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    model.fit(train_images, train_labels, epochs=epochs, batch_size=batch_size, verbose=2)
    return model

def evaluate_model(model, test_images, test_labels):
    test_loss, test_accuracy = model.evaluate(test_images, test_labels, verbose=2)
    print(f"Test accuracy: {test_accuracy*100:.2f}%")

def predict(model, image_path):
    img = load_img(image_path, target_size=(224, 224))
    img = img_to_array(img)
    img = img.reshape((1, img.shape[0], img.shape[1], img.shape[2]))
    img = preprocess_input(img)
    
    prediction = model.predict(img)
    return decode_predictions(prediction, top=3)[0]

def main():
    data_dir = 'path_to_data_directory'
    images, labels = load_data(data_dir)
    train_images, test_images, train_labels, test_labels = preprocess_data(images, labels)
    
    # Custom Model
    custom_model = create_custom_model(train_images.shape[1:], len(np.unique(labels)))
    fine_tune_total(custom_model)
    custom_model = train_model(custom_model, train_images, train_labels)
    evaluate_model(custom_model, test_images, test_labels)
    
    # VGG-16 Transfer Learning
    vgg16_model = load_vgg16_model(len(np.unique(labels)))
    fine_tune_partial(vgg16_model, 5)
    vgg16_model = train_model(vgg16_model, train_images, train_labels)
    evaluate_model(vgg16_model, test_images, test_labels)

if __name__ == "__main__":
    main()
