In [1]:
#Split Recognized Speech Into Chunks Cell

#Step 2.2: split audio file/ recognized text (with offsetDifference (in ms) between 2 consecutive words indicates new chunk)
def splitRecognizedText(wordList, offsetDifference):   #Input: Recognized text word list (with all attributes) | Output: array of arrays, each chunk = 1 array (of words list with their attributes)
    chunks=[]
    currentChunk=[]

    for i in range(len(wordList)):
        currentWord= wordList[i]

        if i==0:currentChunk.append(currentWord)
        else:

            currentWordOffset= wordList[i]["Offset"]
            previousWordOffset= wordList[i-1]["Offset"]

            if currentWordOffset> (previousWordOffset + offsetDifference):  #indicates new chunk (2.5s difference in offset)
                chunks.append(currentChunk)
                currentChunk=[]
                currentChunk.append(currentWord)

            else: currentChunk.append(currentWord)                #current word is still in the same chunk


            if (i== len(wordList)-1): chunks.append(currentChunk)  #if this was the last word append the current chunk to chunks

    return chunks


# Step 2.3: Convert chunks (arrays of word dict(s)) to strings 
def convertChunksToStrings(chunks): #Inputs: list of chunks (array of arrays) (each chunks includes the word objects with attributes)  | Output: list of the actuals strings of each chunk (first word capitalized & with a period at the end of chunk to denote end of chunk)
        
    def convert1ChunkToString(wordlist):       #Input: list of word objects (with attributes) | Output: string of words put together   
        string=""
        for i in range(len(wordlist)):
            if i==0:
                string+= wordlist[i]['Word'].capitalize()    #capitilizing first word (first letter)
            else:
                string+= " " + wordlist[i]['Word'] 
        
        string+=". "  #add a period to denote end of chunk
        return(string)
    
    
    chunksStringsList=[]
    
    for i in range(len(chunks)):
        string= convert1ChunkToString(chunks[i])
        startTime= chunks[i][0]['Offset']
        obj= {'String': string , 'StartTime': startTime}
        chunksStringsList.append(obj)
    return chunksStringsList
   

In [2]:
#Extract Text From PDF
def pdfToText(path):  #Input: PDF file path | Output: list of strings (1 slot per page (text) ) (Slide 1 contents in slot 1 & so on)
    import fitz  # this is pymupdf (module used to extract text from pdf)       #perfect way to read text from a pdf (& formatting maintained, include line breaks ('\n is in string of each slot, but doesn't appear when printing)

    with fitz.open(path) as doc:
        pages=[0]                             # reserving slot 0 for the no similarity score slide (by adding a 0 as first element so what's appended will start from slot 1 (putting contents of slide 1 in slot 1 for easier representation) in this array
        # print(len(doc))                     # gets length of pdf file
        for i in range(len(doc)):        
            text = doc[i].get_text()          
            pages.append(text)
    return(pages)


def matchSlides(totalSlides, timeArray, wordList):
    slides=[]
    
    for i in range(totalSlides +1): #Create SLides List |  +1, so slide 0 is empty
        obj={"Slide": i, "Words":[], 'ChunksStrings':[]}
        slides.append(obj)
    

    for i in range(len(timeArray)):
        startTime= timeArray[i]['Time']

        if i==len(timeArray)-1:                 #Last Entry in timeArray
            positive_infinity = float('inf')
            endTime= positive_infinity
        else:
            endTime= timeArray[i+1]['Time']


        slideNo= timeArray[i]['Slide']

        #currentChunk=[]
        #currentChunk=[]     # current chunk is a list of word objects (dict(s))

        for y in wordList:
            if y['Offset']> endTime:
                break
            if y['Offset']>= startTime and y['Offset']< endTime:
                slides[slideNo]['Words'].append(y)
    
    for i in range(totalSlides +1): #Create SLides List |  +1, so slide 0 is empty
        currentSlide= slides[i]
        chunks = splitRecognizedText(currentSlide['Words'], 5000)
        chunksStrings= convertChunksToStrings( chunks )
        currentSlide['ChunksStrings']= chunksStrings
        

    return(slides)

In [3]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from operator import itemgetter
from itertools import groupby

def sentenceSimilarity(referenceSent, sentences, modelPath): #Input: slideNo (int, slide number), referenceSent (string, slideText comparing to), sentences (list of strings, list of sentences), modelPath (string: path of model either local path or sentence-transformers path) | Output: array of similarity scores (of sentences compared to reference sentence)  

    model = SentenceTransformer(modelPath)  
    # encode method takes array of strings always
    embeddings = model.encode([referenceSent])   
    embeddings2 = model.encode(sentences)
    
    #Cosine similarity first parameter (embedings of reference sentence) must by array of array
    x= cosine_similarity( [embeddings[0]] , embeddings2 )  #Let's calculate cosine similarity for source sentence 
    similarityScores= x[0]                                 #x[0] = array of similarity score of every sentence compared to ref sentence (ref sentence not included)
    
    return similarityScores

def integrateDeepLeaning(slides): #Input: slides ->> list of dict(s) -> {"Slide": i, "Words":[],   'ChunksStrings':[ {'String': "" , 'StartTime': 0}] }
    
    slidesText= pdfToText('templates/static/lec.pdf')   # array of strings 1 slot per slide
    #NOTE: slides & slidesText have slide 0 empty reserved
    
    
    #(Special Cases) (To be done later): 1st (title) slide 
    
    #1) (Implemented in Level 1) don't compare the spoken chunksStrings assigned to that slide to the text of the other slides by -> checking the original slide 
    #in slideText array if it's boolean variable is true (that this slide is a 1st slide/picture slide) then don't append these chunks/strings to array 1 & 2
    #which make up the sentences array together. And append to output array right away (OutputArray -> [  {"Slide": i, 'ChunksStrings':[ {'String': "" , 'StartTime': 0}] } ]
    #Sentences array contains sentences that are compored to every slides' text to find a better match than the original slide it was assigned to)
    
    #2) (Implemented in Level 2) don't use this slide's text as reference sentence by -> in level 2 for loop we're iterating through the slidesText array 
    #(text of every slide one by one & using its text as reference sentence and comparing it to the sentence array), so in every iteration check if this slide 
    #is 1st slide/picture slide (obtained by the boolean variable in slidesText array) if true (that is slide is a 1st slide/picture slide) then skip this iteration/slide
    
    #------------------------------------------------------------------------------------------------------------
    #Level 0
    #Construct OutputArray -> [  {"Slide": i, 'ChunksStrings':[ {'String': "" , 'StartTime': 0}] } ] (length: no. of slides + 1 (slot 0 reserved for slide 0) just like slides & slidesText array
    
    #Level 1
    #1)Array 1: Counstruct Array of dict(s) for all chunksStrings across all slides: {String:"", StartTime:"",  Original SLide: 0, Scores: [ {Slide: 0, Score:0} ] }   
    #2)Array 2/ Sentence Array: Construct Arrray of strings for all chunksStrings across all slides (extract it from array 1 (the array of dict(s))

    #Level 2
    #Reference Sentence: slides text (1 slide at a time) #this way we're going to load model no. of slides times
    #Sentneces Array is the array 2 (the array of strings)
    #record down similarity score of every string compared to every slide in array1
    
    #Level 3
    #then push strings to the OutputArray -> [  {"Slide": i, 'ChunksStrings':[ {'String': "" , 'StartTime': 0}] } ] (append from array 1)
    #then order by start time & return OutputArray
    
    #-----------------------------------------------------------------------------------------------------------
    #Level 0 (constructing OutputArray)
    
    OutputArray=[]
    for i in range(len(slides)):
        obj= {'Slide': i, 'ChunksStrings':[]}
        OutputArray.append(obj)

    #-----------------------------------------------------------------------------------------------------------
    
    #Level 1 (constructing array 1 & 2 & deciding which chunks/string per slide is going to be appended to compare to other slides & find a better match)
    array1=[]
    array2=[]
    
    if len(slides)>1:
    
        for i in range(1,len(slides)): # x= {"Slide": i, "Words":[],   'ChunksStrings':[ {'String': "" , 'StartTime': 0}] }
            x= slides[i]
            ChunksStrings= x['ChunksStrings']

            #(Special Case) if slide is 1st slide/ title slide do this: (append to output array right away

            if i==1:                                    
                OutputArray[i]['ChunksStrings']= ChunksStrings

            else:

                for y in ChunksStrings:    #y = {'String': "" , 'StartTime': 0}
                    obj= y 
                    obj['OriginalSlide'] = x['Slide']
                    obj['Scores']= []


                    array1.append(obj)            
                    array2.append(obj['String'])

    #-----------------------------------------------------------------------------------------------------------
    #Level 2 (comparing every slides' text, using every slides's text as reference sentence 1 at a time)
    #Array 2 is sentences array / 1 slide text at a time is ref sentence (put
    
    model= 'models/Pyjay-multilingual-snli-v2-500k'
   
    if len(slidesText)>2 and len(array2)>0:
        
        for i in range(2,len(slidesText)):   #start doing this from slide 2 (& dont do it for slide 1)             

            #(Special Case) if current slide in loop is not 1st slide 
            #{
            referenceSentence= slidesText[i]
            scores= sentenceSimilarity(referenceSentence, array2, model)  #scores of every sentence/chunkString matched to currrent slide

            for j in range(len(scores)):  # j is the index of current chunkString in the big sentences array
                obj= {'Slide': i, 'Score': scores[j]}
                array1[j]['Scores'].append(obj)
            #}  
    #-----------------------------------------------------------------------------------------------------------
    
    
    #Level 3 
    #Array 1 output sample
    #[
    # {'String': 'ما my هي إيه aim in life. ', 'StartTime': 4820.0, 'OriginalSlide': 1, 
    #  'Scores': [{'Slide': 1, 'Score': 0.045683645}, {'Slide': 2, 'Score': 0.06524171}, {'Slide': 3, 'Score': 0.37230912}, {'Slide': 4, 'Score': 0.047170453}, {'Slide': 5, 'Score': 0.12556762}, 
    #             {'Slide': 6, 'Score': 0.10630279}, {'Slide': 7, 'Score': 0.20325503}, {'Slide': 8, 'Score': 0.05992089}, {'Slide': 9, 'Score': 0.09721908}, {'Slide': 10, 'Score': -0.000944698}]} 
    #  ,   
    # {'String': 'Going to the river. ', 'StartTime': 19260.0, 'OriginalSlide': 3, 
    # 'Scores': [{'Slide': 1, 'Score': 0.060692}, {'Slide': 2, 'Score': 0.17372939}, {'Slide': 3, 'Score': 0.041466802}, {'Slide': 4, 'Score': 0.22299403}, {'Slide': 5, 'Score': 0.048308853}, 
    #             {'Slide': 6, 'Score': 0.13261351}, {'Slide': 7, 'Score': -0.008525603}, {'Slide': 8, 'Score': 0.009413469}, {'Slide': 9, 'Score': 0.037716635}, {'Slide': 10, 'Score': 0.043555353}]}
    
    #]
    
    
    #a) Append to output array (to original slide or slide with max score) 
    for x in array1:
        
        obj= {'String':  x['String'] , 'StartTime': x['StartTime'] }
        
        originalSlideNo= x['OriginalSlide']
        scores= x['Scores']
        
        maxScore=0
        maxScoreSlideNo=0
        
        for y in scores:
            score= y['Score']
            slideNo= y['Slide']
            if score> maxScore:
                maxScore= score
                maxScoreSlideNo= slideNo
        
        #got max score & max score slide No
        
        if maxScore==0:   #append to original slide if max similarity score is 0
            OutputArray[originalSlideNo]['ChunksStrings'].append(obj)
        
        else:  #append to slide with max similarity score 
            OutputArray[maxScoreSlideNo]['ChunksStrings'].append(obj)
                  
    #b) Ordering chunksStrings by start time
    
    for x in OutputArray:
        chunksStrings= x['ChunksStrings']
        
        from operator import itemgetter
        newlist = sorted(chunksStrings, key=itemgetter('StartTime')) 
        x['ChunksStrings'] = newlist
            
    print()
    print("Array1")
    display(array1)
    print()
    
    
    return OutputArray
   

In [4]:
#Speech to Text Cell  (New Speech To Text)

import azure.cognitiveservices.speech as speechsdk
import json
import time
import math

#Common
def display(list1):                         ### Input: array of Words (dict)  | Output: prints every slot in seperate line
    for x in list1:
        print(x)
    print()

def stringifyWordList(wordlist):                     ### Input: array of Words (dict) | Output: string (readable form of word array)  
    string=""
    for i in range(len(wordlist)):
        string+= wordlist[i]['Word'] + " "

        #Line Breaks on Each Language Switch {
        #if i<len(wordlist)-1 :
        #    if wordlist[i]['Language']!= wordlist[i+1]['Language']:
        #        string += '\n'
        #}

    #return ("Output >>> \n" +string)       # you need to print something english first, in case the first word is arabic becuase if is python will print from right to left (the whole output, becuase it detected arabic printout)
    string += "\n"
    return(string)
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

#Continous Speech To Text Recognition
def speech_recognize_continuous_from_file(filePath):         ### Input: audio file path  | Output: dict {English (Word Array), Arabic (Word Array), Combined Ordered (Word Array), Overlap Filtered (Word Array), Overlap Filteration Debugging (String Array) }
                                                                                                      # word array -> word is a dict with all attributes

    #Helper Methods
    
    def getMinOffset(words):               #keeps getting the current minumum offset of the word list
        minIndex=0
        minOffset= words[0]['Offset']
        for i in range(len(words)):
            if words[i]['Offset']<minOffset:
                minIndex=i
                minOffset= words[i]['Offset']
        return minIndex

    
    def ordering(words):                  #orders list by offset (gets current min offset & pops it from the old list & adds it to the new list)
        newWords=[]
        for i in range(len(words)):
            minIndex= getMinOffset(words)
            obj= words.pop(minIndex)
            newWords.append(obj)
        return newWords
    
    
        
    def overlapFilteration(words, overlapPercentage1, confidenceDifference1):      #words is list of orders words dict(s) 
        newWords=[]      
        overlapDebug=[]
        
        for i in range(len(words)):
            chosenWords=[]
            
            currentWord= words[i]
            currentWordOffset= currentWord['Offset']
            currentWordConfidence= currentWord['Confidence']
            currentWordDuration= currentWord['Duration']


            if i== 0:                          #if the currrent word is the first word just append it to the new list
                newWords.append(currentWord)
                

            else:                                        #comparing the current word with the word after it
                prevWord= newWords.pop()
                prevWordOffset= prevWord['Offset']
                prevWordConfidence= prevWord['Confidence']
                prevWordDuration= prevWord['Duration']

                overlap= prevWordOffset + prevWordDuration - currentWordOffset
                overlapPercentage= (overlap / currentWordDuration) * 100               ##overlap percentage of currentword duration
                confidenceDiff= abs(currentWordConfidence-prevWordConfidence)
                
                if overlapPercentage >= overlapPercentage1 and confidenceDiff > confidenceDifference1:        ###   Main If condition 1

                    if prevWordConfidence > currentWordConfidence :
                        newWords.append(prevWord)
                        chosenWords.append(prevWord['Word'])

                    else:
                        newWords.append(currentWord)
                        chosenWords.append(currentWord['Word'])


                else:
                    newWords.append(prevWord)
                    chosenWords.append(prevWord['Word'])
                    
                    newWords.append(currentWord)
                    chosenWords.append(currentWord['Word'])

                    
                debug= "Overlap Comparison -> Prev Word: " + str(prevWord['Word']) +" | Current Word: " + str(currentWord['Word']) + " | Confidence Difference: " + "{:.2f}".format(confidenceDiff) +  " | Overlap Percentage: " + str(overlapPercentage) + " | Chosen Word(s): " + str(chosenWords) +  "\n"  
                overlapDebug.append(debug)
                
    
        return {"Words": newWords , "Debug": overlapDebug}

    
    #------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
    words=[]
    
    def recognizeLanguage(language):   # appends the recognized words (according to language given) to 'words' 
        
        wordsList=[]
        
        speech_config = speechsdk.SpeechConfig(subscription="0915a529a519455daa5e8dfce0a921df", region="southafricanorth")
        speech_config.speech_recognition_language=language                                                                     #Language Here
        speech_config.set_service_property("wordLevelConfidence","true", speechsdk.ServicePropertyChannel.UriQueryParameter)
        speech_config.request_word_level_timestamps()      #Gets offset & duration of each word                                             
        speech_config.set_profanity(speechsdk.ProfanityOption.Removed)  #Removes profane words
        speech_config.set_service_property("format","detailed", speechsdk.ServicePropertyChannel.UriQueryParameter)

        audio_config = speechsdk.audio.AudioConfig(filename=filePath)                                                         #path of file here
        speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)


        #Stop Callback
        done = False
        def stop_cb(evt):
            """callback that signals to stop continuous recognition upon receiving an event `evt`"""
            #print('CLOSING on {}'.format(evt))
            nonlocal done
            done = True

        #Handle Recognized Speech Envent
        def handleResult(evt):          #handles result of every small output
            r= evt.result
            output= json.loads(r.json)['NBest'][0]            # the display text of the result is always associated with ['NBest'][0]       
            
            currentText= r.text 
            currentWords=output['Words']
            currentWords2=[]
            
            for x in currentWords:                           #Simplfying word array at the smallest level (each small output alone, simmplfying before adding to big array)
                wordLength= len(x['Word'])
                
                if wordLength>1:                              #Eliminating words with length 1 (alphabets on their own) ('و' is always appended to next word)
                    x['Offset']= x['Offset']* 10**-4
                    x['Duration']= x['Duration']* 10**-4
                    x['Confidence']= x['Confidence']* 10**2
                    x['Language']= language
                    currentWords2.append(x)
              
                ##Here you can update the x['Word'] by the word in currentText (r.text) (display text) to add punctuation (but there are some cases like apostrophe you need to take care of)
           
     
            def confidenceThreshold(percentage, currentWords):           # Input: confidence threshold percentage & list of words (dict(s))  | Output: filtered list of words       
                newCurrentWords=[]
                for x in currentWords:
                    if x['Confidence'] > percentage:
                        newCurrentWords.append(x)
                return newCurrentWords
           
            currentWords2= confidenceThreshold(5, currentWords2)            #Confidence Threshold Filteration at the simplest level (current 5%)
            wordsList.extend(currentWords2)
            
            #print()
            #print(r.text)          #Display Text of Result (with punctuation)
            #print(currentWords)   #List of each word in the display text with a confidence rate
            
        

        #Recognized Speech
        speech_recognizer.recognized.connect(lambda evt: handleResult(evt) )   # only occurs if speech is recognized (r.reason == speechsdk.ResultReason.RecognizedSpeech) (so you don't need to handle the speech not recognized condition

        # Stopping Conditions (of continous speech recognition-> either session stopped or canceled events
        speech_recognizer.session_stopped.connect(stop_cb)
        speech_recognizer.canceled.connect(stop_cb)

        # Start continuous speech recognition
        speech_recognizer.start_continuous_recognition()
        
       
        #Delay between code
        while not done:
            time.sleep(.5)
        
        # Stop continuous speech recognition & Return output
        speech_recognizer.stop_continuous_recognition()
        
        return wordsList 
        
    
    
    
    #English
    english = recognizeLanguage("en-US")
    
    #Arabic
    arabic = recognizeLanguage("ar-EG")
    
    #Recognizing English Only
    #arabic=[]
    
    words= english + arabic 
    
    #--------------------------------------------
    words = ordering(words)
    overlapFilter = overlapFilteration(words, 40, 5)  # 40%  is the overlap filteration percentage, 5% is the confidence difference # if there's if 40% overlap between 2 words & confidence difference is greater than 5%, overlap filertation will occur & the higher confidence word will be chosen
    
    newWords  = overlapFilter['Words'] 
    newWordsDebug  = overlapFilter['Debug'] 
    
    return {'English': english , 'Arabic': arabic , 'Ordered': words , 'Overlap Filtered': newWords , 'Overlap Debug': newWordsDebug}

In [5]:
#Speech to Text Cell 2 (Continous Speech Recognition, with continous language detection #not natural needs to pause for 1 sec when code switching
#Link: https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-identification?tabs=once&pivots=programming-language-python

import azure.cognitiveservices.speech as speechsdk
import json
import time
import math


def speech_recognize_continuous_from_file2(filePath): 
    #Stop Callback
    def stop_cb(evt):
        """callback that signals to stop continuous recognition upon receiving an event `evt`"""
        #print('CLOSING on {}'.format(evt))
        nonlocal done
        done = True

    #Handle Recognized Speech Envent


    
    endpoint_string = "wss://{}.stt.speech.microsoft.com/speech/universal/v2".format("southafricanorth")

    speech_config = speechsdk.SpeechConfig(subscription="0915a529a519455daa5e8dfce0a921df", endpoint=endpoint_string)
    speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceConnection_ContinuousLanguageIdPriority, value='Latency')
    speech_config.set_service_property("wordLevelConfidence","true", speechsdk.ServicePropertyChannel.UriQueryParameter)
    speech_config.request_word_level_timestamps()      #Gets offset & duration of each word                                             
    speech_config.set_profanity(speechsdk.ProfanityOption.Removed)  #Removes profane words
    speech_config.set_service_property("format","detailed", speechsdk.ServicePropertyChannel.UriQueryParameter)

    audio_config = speechsdk.audio.AudioConfig(filename=filePath)                                                         #path of file here
    
    auto_detect_source_language_config = speechsdk.languageconfig.AutoDetectSourceLanguageConfig(languages=["en-US", "ar-EG"])
    
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, auto_detect_source_language_config=auto_detect_source_language_config, audio_config=audio_config)


    #Stop Callback
    done = False

    stringList=[]
    
    def handleResult(evt):          #handles result of every small output
        r= evt.result
        #print(r.text)
        #output= json.loads(r.json)['NBest'][0]            # the display text of the result is always associated with ['NBest'][0]       
        stringList.append(r.text)
        
    

    #Recognized Speech
    speech_recognizer.recognized.connect(lambda evt: handleResult(evt) )  # only occurs if speech is recognized (r.reason == speechsdk.ResultReason.RecognizedSpeech) (so you don't need to handle the speech not recognized condition

    # Stopping Conditions (of continous speech recognition-> either session stopped or canceled events
    speech_recognizer.session_stopped.connect(stop_cb)
    speech_recognizer.canceled.connect(stop_cb)

    # Start continuous speech recognition
    speech_recognizer.start_continuous_recognition()


    #Delay between code
    while not done:
        time.sleep(.5)

    # Stop continuous speech recognition & Return output
    speech_recognizer.stop_continuous_recognition()

    string=""
    for x in stringList:
        string+= x + " "
        
    print(string)    
    return string



In [6]:
#Match Notes to pdf file (with iteration)

from PyPDF2 import PdfFileWriter, PdfFileReader
import io
from reportlab.pdfgen import canvas
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont 

import arabic_reshaper
from bidi.algorithm import get_display



def getPageSize(path, pageNo):
    pdf = PdfFileReader(open(path, 'rb'))
    page_1 = pdf.getPage(pageNo)
    if page_1.get('/Rotate', 0) in [90, 270]:
        return page_1['/MediaBox'][2], page_1['/MediaBox'][3]
    else:
        return page_1['/MediaBox'][3], page_1['/MediaBox'][2]
    
    
def outputPDF(path, strings):          #Input: path -> pdf file path, strings -> list of strings (first entry is the notes of 1st slide directly no slide 0) | Output: new pdf file with notes added (destination.pdf)
    
    originalPDF = PdfFileReader(open(path, "rb"))
    totalpages = originalPDF.numPages

    output = PdfFileWriter()

    
    #inside loop
    for j in range(totalpages): #iterate over each slide
                
        slideNotes= strings[j] #1 string       #notes of corrosponding slide
        
        #split slide notes on 100 charchters (on spaces)
        
        
        stringsList=[]
        counter=0
        currentString=""

        for i in range(len(slideNotes)): 
            currentCharacter = slideNotes[i]

            if counter<120:
                currentString+= currentCharacter
                counter+=1
            else:
                if currentCharacter!= " ":
                   currentString+= currentCharacter
                   counter+=1
                else:                                  #stopiing condition 1
                    currentString+= currentCharacter
                    stringsList.append(currentString)
                    counter=0
                    currentString= ""

            if i==len(slideNotes)-1:   #stoppiing condition 2
                stringsList.append(currentString)
                    
            
        #now string of slide is split into 100 character strings in strings list array
                       
        h,w=getPageSize(path,j)
       
                
        #Create New Page
        addedHeight= 45 + (15* len(stringsList) )
        packet = io.BytesIO()
        can = canvas.Canvas(packet, pagesize=  (w, h + addedHeight)   )           # page size= (w,h)
        
        #can.setStrokeColorRGB(0,0,1)
        can.rect(65,5,w-130,addedHeight-10, fill=0)
        
        currentHeight= addedHeight-20
        can.setFillColorRGB(0,0,1)
        
       
        pdfmetrics.registerFont(TTFont('arial', 'assets/arial.ttf'))
        can.setFont('arial', 12)
        can.drawString(75, currentHeight, "NOTES")
        currentHeight-=25
        
        can.setFillColorRGB(0,0,0)
        for x in stringsList:
            x= "I      " + x
            ar = arabic_reshaper.reshape(x)
            ar = get_display(ar)
            can.drawString(75, currentHeight,  ar)
            currentHeight-=15
            
        can.save()
        packet.seek(0)
        new_pdf = PdfFileReader(packet)

        page = new_pdf.getPage(0)
        page.mergeScaledTranslatedPage( originalPDF.getPage(j) , 1 , 0 , addedHeight)    #merge old page with new page (so the page would have the size of new page)

        output.addPage(page)
   
    
    #out of loop
    outputStream = open("destination.pdf", "wb")
    output.write(outputStream)
    outputStream.close()


    
def convertToStringPerSlide(list1): # Input -> [  {"Slide": i, 'ChunksStrings':[ {'String': "" , 'StartTime': 0}] } ] (length: no. of slides + 1 (slot 0 reserved for slide 0) 
    strings=[]
    for x in list1:
        chunks= x['ChunksStrings']
        currentString=""
        for y in chunks:
            currentString1= y['String']
            currentString+= currentString1 + " "
        strings.append(currentString)
   
    return strings
            


In [None]:
#Website

from flask import Flask, render_template, request, redirect, flash, session, url_for, send_from_directory
from werkzeug.utils import secure_filename
import os
import json



app = Flask(__name__)
app._static_folder = "templates/static"


@app.route('/', methods = ['GET', 'POST'])
def first():
    if request.method == 'POST':
        f = request.files['file']
        f.save(secure_filename("lec.pdf"))
        os.replace("lec.pdf", "templates/static/lec.pdf")  #moving uploaded pdf to correct directory that can be accible by html
        return render_template("index.html",data=1)
    else:
        return render_template('upload.html')



@app.route("/uploadAudio", methods=['POST', 'GET'])
def index():
    if request.method == "POST":
        f = request.files['audio_data']
        f.save(secure_filename('assets/audioFile.wav'))
        print('file uploaded successfully')
        
        #------------------------------------
        #Resampling audio file to 44.1khz && saving file as <assets/audioFile.wav>
        file='assets/audioFile.wav'
        import librosa    
        import soundfile
        t, sr = librosa.load(file, sr= 44100)
        soundfile.write('assets/audioFile.wav', t, sr)
        #-------------------------------------
        return 0
    


timeArray=[]
totalSlides=0


#Retrieving data from js 
@app.route("/uploadTimeArray", methods=['POST', 'GET'])
def test():

    response = request.data.decode("utf-8") #decode to convert bytes to strings
    response = json.loads(response)     #json parsing (reverting json.stringfy)
    
    global timeArray              #to update global variable & not make local variable 
    global totalSlides
    
    totalSlides= response.pop(0);  # extracting totalslides at index 0 of response
    timeArray= response;
   
  
@app.route('/result')
def result():    
    
    file='assets/audioFile.wav'
    global timeArray
    global totalSlides
    
    print()
    display(timeArray)
    
    #------------------------------------------------------------------------
    #Speech Recognition 1
    output= speech_recognize_continuous_from_file(file)
    wordListOrdered= output['Ordered']
    wordList= output['Overlap Filtered']
    
    
    #Speech Recognition 2
    #wordList2= speech_recognize_continuous_from_file2(file) #prints output in console
    #------------------------------------------------------------------------
    
    
    notes = matchSlides(totalSlides, timeArray, wordList) 
    #for x in notes:          #Display Notes
    #    print("Slide: " +  str(x['Slide'] ) ) 
    #    display(x['ChunksStrings'])

    
   
    #print("WordList")
    #display(wordList)
    #print()
    
    #print("WordList Before Overlap")
    #display(wordListOrdered)
    
    notes2= integrateDeepLeaning(notes)  # output-> [  {"Slide": i, 'ChunksStrings':[ {'String': "" , 'StartTime': 0}] } ] (length: no. of slides + 1 (slot 0 reserved for slide 0) 
    
    str1= convertToStringPerSlide(notes2)
    str1.pop(0) #remove slote 0 (reserved for slide 0 , slide not in pdf)
    outputPDF("templates/static/lec.pdf", str1)
    print(str1)
    
    #change notes 2 to each slide having 1 string of notes (array of strings with the order of the slides, no slot reserved for slide 0, which doesn't exist)
    
    #print()
    #print("Integrate Deep Learning")
    #display(notes2)
  
    #return render_template('output.html', data=notes2)
    workingdir = os.path.abspath(os.getcwd())
    return send_from_directory(workingdir, 'destination.pdf')


#test method to show pdf in browser through flask
@app.route('/result2')
def result2(): 
    workingdir = os.path.abspath(os.getcwd())
    
    return send_from_directory(workingdir, 'destination.pdf')
    
if __name__ == '__main__':
   app.run(debug = False)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [01/Jul/2022 23:19:18] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [01/Jul/2022 23:19:24] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [01/Jul/2022 23:19:38] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [01/Jul/2022 23:19:47] "POST / HTTP/1.1" 200 -


file uploaded successfully


[2022-07-01 23:21:01,726] ERROR in app: Exception on /uploadAudio [POST]
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\flask\app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\ProgramData\Anaconda3\lib\site-packages\flask\app.py", line 1953, in full_dispatch_request
    return self.finalize_request(rv)
  File "C:\ProgramData\Anaconda3\lib\site-packages\flask\app.py", line 1968, in finalize_request
    response = self.make_response(rv)
  File "C:\ProgramData\Anaconda3\lib\site-packages\flask\app.py", line 2127, in make_response
    raise TypeError(
TypeError: The view function did not return a valid response. The return type must be a string, dict, tuple, Response instance, or WSGI callable, but it was a int.
127.0.0.1 - - [01/Jul/2022 23:21:01] "POST /uploadAudio HTTP/1.1" 500 -
[2022-07-01 23:21:01,734] ERROR in app: Exception on /uploadTimeArray [POST]
Traceback (most recent call last):
  File "C:\ProgramData


{'Time': 0, 'Slide': 1}
{'Time': 10494, 'Slide': 2}
{'Time': 35293, 'Slide': 3}
{'Time': 50602, 'Slide': 4}


Array1
{'String': 'ده the في first law of newton بيقول إن يبقى body is at rest or يتحرك بسرعة ثابتة إن in straight line هتفضل or هتفضل تتحرك إن in straight line at constant speed unless it is acted upon by force. ', 'StartTime': 14350.0, 'OriginalSlide': 2, 'Scores': [{'Slide': 2, 'Score': 0.81207037}, {'Slide': 3, 'Score': 0.40498498}, {'Slide': 4, 'Score': 0.40445456}]}
{'String': 'ده the second law of newton إذا quantitative description of the changes that can produce on the motion of her body. ', 'StartTime': 38860.0, 'OriginalSlide': 3, 'Scores': [{'Slide': 2, 'Score': 0.4088407}, {'Slide': 3, 'Score': 0.8290877}, {'Slide': 4, 'Score': 0.5953171}]}
{'String': 'The third law of newton states that when two bodies interact they apply forces to one another ده طبعا are equal in magnitude and opposite in direction. ', 'StartTime': 53350.0, 'OriginalSlide': 4, 'Scores': [{'Slide

127.0.0.1 - - [01/Jul/2022 23:22:30] "GET /result HTTP/1.1" 200 -


["Today's our first physics lecture and we're going to talk about the three newton laws وإزاي هما بيأثروا علينا.  ", 'ده the في first law of newton بيقول إن يبقى body is at rest or يتحرك بسرعة ثابتة إن in straight line هتفضل or هتفضل تتحرك إن in straight line at constant speed unless it is acted upon by force.  ', 'ده the second law of newton إذا quantitative description of the changes that can produce on the motion of her body.  ', 'The third law of newton states that when two bodies interact they apply forces to one another ده طبعا are equal in magnitude and opposite in direction.  ']
