In [None]:

import cv2
import time
import datetime
import speech_recognition as sr
from transformers import BartTokenizer, BartForConditionalGeneration
import face_recognition
import numpy as np
import threading
import pandas as pd
import os
import nltk
from rouge import Rouge
from nltk.translate.bleu_score import corpus_bleu

# Load the pre-trained BART model for text summarization
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')

# Define paths to images and corresponding names
image_paths = {
    "C:/project/photos/Goutham.jpg": "GOUTHAM",
    "C:/project/photos/sumanth.jpg": "SUMANTH",
    "C:/project/photos/vamsi.jpg": "VAMSI"
}

# Load enrolled faces and corresponding names from the system
enrolled_faces = {}
for path, name in image_paths.items():
    image = face_recognition.load_image_file(path)
    encoding = face_recognition.face_encodings(image)[0]  # Assuming only one face per image
    enrolled_faces[encoding.tobytes()] = name

# Function to recognize faces in an image
def recognize_faces(image):
    
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    face_locations = face_recognition.face_locations(rgb_image)
    
    face_encodings = face_recognition.face_encodings(rgb_image, face_locations)
    
    detected_faces = {}
    
    for face_encoding, face_location in zip(face_encodings, face_locations):
        
        for enrolled_encoding, name in enrolled_faces.items():
            
            enrolled_encoding_np = np.frombuffer(enrolled_encoding, dtype=np.float64)
            
            distance = np.linalg.norm(np.subtract(face_encoding, enrolled_encoding_np))
            
            if distance < 0.6:  
                detected_faces[face_location] = name
        
                break  
          
    return detected_faces

# Function to store presentation details in an Excel file and open it
def store_presentation_details_in_excel(details):
    # Check if the Excel file already exists or not
    try:
        df = pd.read_excel('presentation_details.xlsx')
    except FileNotFoundError:
        # Create a new DataFrame if the file does not exist
        df = pd.DataFrame(columns=['Presenter', 'Duration', 'Date', 'Summary'])

    # Append new presentation details to the DataFrame
    df = pd.concat([df, pd.DataFrame([details], columns=df.columns)], ignore_index=True)

    # Write the DataFrame to the Excel file
    try:
        df.to_excel('presentation_details.xlsx', index=False)
        # Open the Excel file
        os.startfile('presentation_details.xlsx')
    except PermissionError:
        print("Permission denied. Make sure the file is closed and you have write permssion")

def calculate_wer(reference_text, hypothesis_text):
    # Tokenize reference and hypothesis texts into words
    reference_tokens = nltk.word_tokenize(reference_text.lower())
    hypothesis_tokens = nltk.word_tokenize(hypothesis_text.lower())

    # Create a matrix to store the distances
    distance_matrix = nltk.edit_distance(reference_tokens, hypothesis_tokens, transpositions=True)

    # Get the WER
    wer = float(distance_matrix) / len(reference_tokens)  
    return wer
    
def calculate_accuracy(original_text, summary_text):
    # Tokenize texts
    nltk.download('punkt')
    original_sentences = nltk.sent_tokenize(original_text)
    summary_sentences = nltk.sent_tokenize(summary_text)

    # Calculate ROUGE scores
    rouge = Rouge()
    rouge_scores = rouge.get_scores(summary_text, original_text)

    # Calculate BLEU scores
    #bleu_scores = corpus_bleu([[sent] for sent in original_sentences], summary_sentences)

    return rouge_scores
# Function to record speech and summarize text
def record_and_summarize_speech(name):
    recognizer = sr.Recognizer()
    segments = []

    print(f"Recording speech for {name}...")

    # Continuously capture and recognize speech
    with sr.Microphone() as source:
        recognizer.adjust_for_ambient_noise(source)
        try:
            audio = recognizer.listen(source, timeout=60)  # Record speech for 60 seconds
        except sr.WaitTimeoutError:
            print("No speech detected within the timeout period.")
            return

    try:
        text = recognizer.recognize_google(audio)
        print("Speech Recognition Result:", text)
        segments.append(text)
        summary = summarize_text(' '.join(segments))
        print("Summary:", summary)
       
        rouge_scores = calculate_accuracy(text, summary)
        wer=calculate_wer(text,summary)
        print("ROUGE Scores:", rouge_scores)
        print("WORD ERROR RATE:", wer)
        # Store presentation details to a dictionary
        presentation_details = {
            "Presenter": name,
            "Duration": "60 seconds",  # Update with actual duration
            "Date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "Summary": summary
        }

        # Store presentation details in an Excel file and open it
        store_presentation_details_in_excel(presentation_details)

    except sr.UnknownValueError:
        print("Could not understand audio")
    except sr.RequestError as e:
        print("Could not request results; {0}".format(e))

# Function to summarize text
def summarize_text(text):
    inputs = tokenizer([text], max_length=1024, return_tensors='pt', truncation=True)
    summary_ids = model.generate(inputs['input_ids'], num_beams=4, min_length=30, max_length=200, early_stopping=True, length_penalty=2.0,no_repeat_ngram_size=3)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Function to handle recording
def start_recording():
    start_time = time.time()
    cap = cv2.VideoCapture(0)
    while cap.isOpened():
        ret, frame = cap.read()

        if not ret:
             print("Failed to capture frame.")
             break

        detected_faces = recognize_faces(frame)

        if detected_faces:
            for face_location, name in detected_faces.items():
                record_and_summarize_speech(name)

                top, right, bottom, left = face_location
                cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
                cv2.putText(frame, name, (left, bottom + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1)
        else:
            if time.time() - start_time > 60:
                print("No face detected for 60 seconds. Stopping recording.")
                break

        cv2.imshow('Face Recognition', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            print("Recording stopped.")
            break

    cap.release()
    cv2.destroyAllWindows()

# Start recordingqq
def prompt_to_start_recording():
    while True:
        start_recording_option = input("Press 'S' to start recording: ")
        if start_recording_option.lower() == 's':
            start_recording()
            break
        else:
            print("Invalid option. Please press 'S' to start recording.")

# Start recording
prompt_to_start_recording()
