Interface avec le CNN

In [1]:
import cv2
import numpy as np
import tkinter as tk
from tkinter import ttk, Label, Button, Frame, messagebox
from threading import Thread
from tensorflow.keras.models import load_model
import pyttsx3
import time
from PIL import Image, ImageTk

class SignLanguageApp:
    def __init__(self):
        self.root = tk.Tk()
        self.root.title("Reconnaissance de Langue des Signes - CNN")
        self.root.geometry("800x600")
        
        self.running = False
        self.detected_text = ""
        self.voice_active = False
        self.last_prediction = ""
        self.last_prediction_time = 0
        
        self.target_classes = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + ["nothing"]
        
        self.init_model()
        self.init_speech_engine()
        self.create_welcome_page()
        
    def init_model(self):
        try:
            self.model = load_model("asl_model_final.keras")
        except:
            messagebox.showerror("Erreur", "Impossible de charger le modèle CNN")
            self.root.destroy()
        
    def init_speech_engine(self):
        self.engine = pyttsx3.init()
        #voices = self.engine.getProperty('voices')  
        #self.engine.setProperty('voice', voices[24].id)  
        #self.engine.setProperty('rate', 150) 
        #self.engine.setProperty('volume', 0.9)
        
    def create_welcome_page(self):
        self.welcome_frame = Frame(self.root)
        self.welcome_frame.pack(expand=True, fill='both')
        
        style = ttk.Style()
        style.configure('Custom.TButton', 
                       font=('Helvetica', 12),
                       padding=10)
        
        title = Label(self.welcome_frame,
                     text="Reconnaissance de Langue des Signes\navec Deep Learning",
                     font=("Helvetica", 24, "bold"),
                     pady=20)
        title.pack()
        
        try:
            img = Image.open('Acceuil.png')
            img = img.resize((300, 300))
            photo = ImageTk.PhotoImage(img)
            img_label = Label(self.welcome_frame, image=photo)
            img_label.image = photo
            img_label.pack(pady=20)
        except:
            placeholder = Frame(self.welcome_frame, 
                             width=300, 
                             height=300, 
                             bg='lightgray')
            placeholder.pack(pady=20)
            Label(placeholder,
                  text="ASL Recognition",
                  font=("Helvetica", 14)).place(relx=0.5,
                                              rely=0.5,
                                              anchor='center')
        
        button_frame = Frame(self.welcome_frame)
        button_frame.pack(pady=20)
        
        ttk.Button(button_frame,
                  text="Commencer la reconnaissance",
                  style='Custom.TButton',
                  command=self.show_detection_page).pack(pady=10)
        
        ttk.Button(button_frame,
                  text="Quitter",
                  style='Custom.TButton',
                  command=self.quit_app).pack(pady=10)

    def delete_last_character(self):
        if self.detected_text:
            self.detected_text = self.detected_text[:-1]
            self.update_detected_text()
            
    def clear_text(self):
        self.detected_text = ""
        self.update_detected_text()
        
    def toggle_voice(self):
        self.voice_active = not self.voice_active
        status = "activée" if self.voice_active else "désactivée"
        self.status_label.config(text=f"Voix {status}")
        
    def create_detection_page(self):
        self.detection_frame = Frame(self.root)
        
        self.text_frame = Frame(self.detection_frame, relief='groove', bd=2)
        self.text_frame.pack(pady=10, padx=10, fill='x')
        
        self.detected_label = Label(self.text_frame,
                                  text="Texte détecté :",
                                  font=("Helvetica", 16),
                                  wraplength=700)
        self.detected_label.pack(pady=10)
        
        control_frame = Frame(self.detection_frame)
        control_frame.pack(pady=10)
        
        button_frame1 = Frame(control_frame)
        button_frame1.pack(pady=5)
        
        ttk.Button(button_frame1,
                  text="Démarrer",
                  command=self.start_detection).pack(side='left', padx=5)
        
        ttk.Button(button_frame1,
                  text="Arrêter",
                  command=self.stop_detection).pack(side='left', padx=5)
        
        ttk.Button(button_frame1,
                  text="Voix On/Off",
                  command=self.toggle_voice).pack(side='left', padx=5)
        
        button_frame2 = Frame(control_frame)
        button_frame2.pack(pady=5)
        
        ttk.Button(button_frame2,
                  text="Ajouter Espace",
                  command=self.add_space).pack(side='left', padx=5)
        
        ttk.Button(button_frame2,
                  text="Supprimer lettre",
                  command=self.delete_last_character).pack(side='left', padx=5)
        
        ttk.Button(button_frame2,
                  text="Effacer tout",
                  command=self.clear_text).pack(side='left', padx=5)
        
        button_frame3 = Frame(control_frame)
        button_frame3.pack(pady=5)
        
        ttk.Button(button_frame3,
                  text="Générer la voix",
                  command=self.generate_voice).pack(side='left', padx=5)
        
        ttk.Button(button_frame3,
                  text="Retour",
                  command=self.show_welcome_page).pack(side='left', padx=5)
        
        self.status_label = Label(self.detection_frame,
                                text="En attente...",
                                font=("Helvetica", 12))
        self.status_label.pack(pady=10)
        
    def show_detection_page(self):
        self.welcome_frame.pack_forget()
        self.create_detection_page()
        self.detection_frame.pack(expand=True, fill='both')
        
    def show_welcome_page(self):
        if self.running:
            self.stop_detection()
        self.detection_frame.pack_forget()
        self.welcome_frame.pack(expand=True, fill='both')
        
    def start_detection(self):
        self.running = True
        self.status_label.config(text="Détection en cours...")
        Thread(target=self.video_loop).start()
        
    def stop_detection(self):
        self.running = False
        self.status_label.config(text="Détection arrêtée")
        
    def add_space(self):
        self.detected_text += " "
        self.update_detected_text()
        
    def generate_voice(self):
        if self.detected_text.strip():
            self.engine.say(self.detected_text)
            self.engine.runAndWait()
            
    def speak(self, text):
        print(f"Speaking: {text}")
        self.engine.say(text)
        self.engine.runAndWait()
        
    def update_detected_text(self):
        self.detected_label.config(text=f"Texte détecté : {self.detected_text}")
        
    def video_loop(self):
        cap = cv2.VideoCapture(0)
        x, y, w, h = 100, 100, 200, 200  
        
        while self.running:
            ret, frame = cap.read()
            if not ret:
                break
                
            roi = frame[y:y + h, x:x + w]
            roi_resized = cv2.resize(roi, (64, 64))
            roi_normalized = roi_resized / 255.0
            roi_reshaped = np.expand_dims(roi_normalized, axis=0)
            
            predictions = self.model.predict(roi_reshaped, verbose=0)
            predicted_class_index = np.argmax(predictions)
            predicted_class = self.target_classes[predicted_class_index]
            
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.putText(frame, predicted_class, (x, y - 10),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
            
            if predicted_class != "nothing":
                current_time = time.time()
                if predicted_class == self.last_prediction:
                    if current_time - self.last_prediction_time > 1:
                        self.detected_text += predicted_class
                        self.update_detected_text()
                        if self.voice_active:
                            Thread(target=self.speak, args=(predicted_class,)).start()
                        self.last_prediction_time = current_time
                else:
                    self.last_prediction = predicted_class
                    self.last_prediction_time = current_time
            
            cv2.imshow("Reconnaissance des signes", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
                
        cap.release()
        cv2.destroyAllWindows()
        
    def quit_app(self):
        if self.running:
            self.stop_detection()
        self.root.destroy()
        
    def run(self):
        self.root.mainloop()

if __name__ == "__main__":
    app = SignLanguageApp()
    app.run()

2025-01-21 16:43:13.794817: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-21 16:43:13.798778: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-21 16:43:13.811531: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1737474193.832487  718476 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1737474193.838237  718476 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-21 16:43:13.857280: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

Interface avec le RandomForest

In [1]:
import cv2
import mediapipe as mp
import numpy as np
import tkinter as tk
from tkinter import ttk, Label, Button, Frame
from threading import Thread
from joblib import load
import pyttsx3
import time
from PIL import Image, ImageTk

class SignLanguageApp:
    def __init__(self):
        self.root = tk.Tk()
        self.root.title("Détection de Langue des Signes")
        self.root.geometry("800x600")
        
        self.running = False
        self.detected_text = ""
        self.current_letter = None
        self.current_letter_start_time = None
        self.current_frame = None
        
        self.init_mediapipe()
        self.init_speech_engine()
        self.init_model()
        self.create_welcome_page()
        
    def init_mediapipe(self):
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(
            static_image_mode=False,
            max_num_hands=1,
            min_detection_confidence=0.5
        )
        
    def init_speech_engine(self):
        self.engine = pyttsx3.init()
        #voices = self.engine.getProperty('voices')  
        #self.engine.setProperty('voice', voices[24].id)  
        #self.engine.setProperty('rate', 150) 
        #self.engine.setProperty('volume', 0.9)
        
    def init_model(self):
        self.rf = load('random.joblib')
        
    def create_welcome_page(self):
        self.welcome_frame = Frame(self.root)
        self.welcome_frame.pack(expand=True, fill='both')
        
        style = ttk.Style()
        style.configure('Custom.TButton', 
                       font=('Helvetica', 12),
                       padding=10)
        
        title = Label(self.welcome_frame,
                     text="Bienvenue dans l'application\nde détection de langue des signes",
                     font=("Helvetica", 24, "bold"),
                     pady=20)
        title.pack()
        
        img = Image.open('Acceuil.png')  
        img = img.resize((300, 300))
        photo = ImageTk.PhotoImage(img)
        img_label = Label(self.welcome_frame, image=photo)
        img_label.image = photo
        img_label.pack(pady=20)
        
        button_frame = Frame(self.welcome_frame)
        button_frame.pack(pady=20)
        
        ttk.Button(button_frame,
                  text="Commencer la détection",
                  style='Custom.TButton',
                  command=self.show_detection_page).pack(pady=10)
        
        ttk.Button(button_frame,
                  text="Quitter",
                  style='Custom.TButton',
                  command=self.quit_app).pack(pady=10)

    def delete_last_character(self):
        if self.detected_text:
            self.detected_text = self.detected_text[:-1]
            self.update_detected_text()
            
    def clear_text(self):
        self.detected_text = ""
        self.update_detected_text()
        
    def create_detection_page(self):
        self.detection_frame = Frame(self.root)
        self.text_frame = Frame(self.detection_frame, relief='groove', bd=2)
        self.text_frame.pack(pady=10, padx=10, fill='x')
        
        self.detected_label = Label(self.text_frame,
                                  text="Texte détecté :",
                                  font=("Helvetica", 16),
                                  wraplength=700)
        self.detected_label.pack(pady=10)
        
        control_frame = Frame(self.detection_frame)
        control_frame.pack(pady=10)
        
        button_frame1 = Frame(control_frame)
        button_frame1.pack(pady=5)
        
        ttk.Button(button_frame1,
                  text="Démarrer",
                  command=self.start_detection).pack(side='left', padx=5)
        
        ttk.Button(button_frame1,
                  text="Arrêter",
                  command=self.stop_detection).pack(side='left', padx=5)
        
        button_frame2 = Frame(control_frame)
        button_frame2.pack(pady=5)
        
        ttk.Button(button_frame2,
                  text="Ajouter Espace",
                  command=self.add_space).pack(side='left', padx=5)
        
        ttk.Button(button_frame2,
                  text="Supprimer lettre",
                  command=self.delete_last_character).pack(side='left', padx=5)
        
        ttk.Button(button_frame2,
                  text="Effacer tout",
                  command=self.clear_text).pack(side='left', padx=5)
        
        button_frame3 = Frame(control_frame)
        button_frame3.pack(pady=5)
        
        ttk.Button(button_frame3,
                  text="Générer la voix",
                  command=self.generate_voice).pack(side='left', padx=5)
        
        ttk.Button(button_frame3,
                  text="Retour",
                  command=self.show_welcome_page).pack(side='left', padx=5)
        
        self.status_label = Label(self.detection_frame,
                                text="En attente...",
                                font=("Helvetica", 12))
        self.status_label.pack(pady=10)
        
    def show_detection_page(self):
        self.welcome_frame.pack_forget()
        self.create_detection_page()
        self.detection_frame.pack(expand=True, fill='both')
        
    def show_welcome_page(self):
        if self.running:
            self.stop_detection()
        self.detection_frame.pack_forget()
        self.welcome_frame.pack(expand=True, fill='both')
        
    def start_detection(self):
        self.running = True
        self.status_label.config(text="Détection en cours...")
        Thread(target=self.video_loop).start()
        
    def stop_detection(self):
        self.running = False
        self.status_label.config(text="Détection arrêtée")
        
    def add_space(self):
        self.detected_text += " "
        self.update_detected_text()
        
    def generate_voice(self):
        if self.detected_text.strip():
            self.engine.say(self.detected_text)
            self.engine.runAndWait()
        
    def update_detected_text(self):
        self.detected_label.config(text=f"Texte détecté : {self.detected_text}")
        
    def video_loop(self):
        cap = cv2.VideoCapture(0)
        roi_width, roi_height = 175, 175
        roi_x, roi_y = 40, 20
        
        while self.running:
            ret, frame = cap.read()
            if not ret:
                break
                
            cv2.rectangle(frame,
                         (roi_x, roi_y),
                         (roi_x + roi_width, roi_y + roi_height),
                         (255, 0, 0), 2)
            
            roi_frame = frame[roi_y:roi_y + roi_height,
                            roi_x:roi_x + roi_width]
            results = self.hands.process(cv2.cvtColor(roi_frame,
                                                    cv2.COLOR_BGR2RGB))
            
            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    landmarks = [(lm.x, lm.y, lm.z)
                               for lm in hand_landmarks.landmark]
                    features = np.array(landmarks).flatten()
                    
                    prediction_probs = self.rf.predict_proba([features])[0]
                    prediction_class = np.argmax(prediction_probs)
                    prediction_prob = prediction_probs[prediction_class]
                    
                    if prediction_prob > 0.5:
                        letter = chr(65 + prediction_class)
                        
                        if letter == self.current_letter:
                            elapsed_time = time.time() - self.current_letter_start_time
                            if elapsed_time >= 1:
                                self.detected_text += letter
                                self.update_detected_text()
                                self.current_letter = None
                                self.current_letter_start_time = None
                        else:
                            self.current_letter = letter
                            self.current_letter_start_time = time.time()
            
            cv2.imshow('Detection de signes', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
                
        cap.release()
        cv2.destroyAllWindows()
        
    def quit_app(self):
        if self.running:
            self.stop_detection()
        self.root.destroy()
        
    def run(self):
        self.root.mainloop()

if __name__ == "__main__":
    app = SignLanguageApp()
    app.run()

2025-01-21 16:38:24.660417: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-21 16:38:24.665564: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-21 16:38:24.684025: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1737473904.707410  714658 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1737473904.713278  714658 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-21 16:38:24.734445: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins