In [1]:
import os
import azure.cognitiveservices.speech as speechsdk
import time

In [18]:
from moviepy.editor import VideoFileClip

# Define the input video file and output audio file
mp4_file = "Ses 03.mp4"
wav_file = "audio_cot.wav"

# Load the video clip
video_clip = VideoFileClip(mp4_file)

# Extract the audio from the video clip
audio_clip = video_clip.audio

# Write the audio to a separate file
audio_clip.write_audiofile(wav_file)

# Close the video and audio clips
audio_clip.close()
video_clip.close()

print("Audio extraction successful!")

MoviePy - Writing audio in audio_cot.wav


                                                                        

MoviePy - Done.
Audio extraction successful!




In [20]:
# This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('AZURE_SUBSCRIPTION_KEY'), region=os.environ.get('AZURE_SERVICE_REGION'))
audio_config = speechsdk.AudioConfig(filename="audio_cot.wav")
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)

In [21]:
def stop_cb(evt):
    speech_recognizer.stop_continuous_recognition()
    stop_cb(evt)

In [22]:
all_results = []

def handle_final_result(evt):
    all_results.append(evt.result.text)
speech_recognizer.recognized.connect(handle_final_result)
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)

In [23]:
speech_recognizer.start_continuous_recognition()

In [24]:
transcription = " ".join(all_results)

In [25]:
with open("transcription.txt", "w") as fp:
    fp.write(transcription)

In [26]:
from dotenv import load_dotenv
from pathlib import Path
load_dotenv()

True

In [27]:
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-4o-mini", )

In [28]:
from langchain_core.messages import HumanMessage, SystemMessage

messages = [
    SystemMessage(content="For a given transcription from a video to be published online, give a very short pre-read for the viewers on what to expect from the video"),
    HumanMessage(content=f"TRANSCRIPTION: {transcription}\nPREREAD:"),
]

response = model.invoke(messages)

In [29]:
response.content

'In this video, we explore the concept of "chain of thought" prompting in language models, focusing on its importance for multi-step reasoning tasks. You\'ll learn how traditional prompting methods can fall short when complex reasoning is required, and how the chain of thought technique can help induce logical thinking in language models. We\'ll discuss the method\'s implementation, including examples that illustrate how to structure prompts effectively to improve the accuracy of model responses. Join us as we unpack this influential prompting technique and its applications in enhancing language model performance!'

In [32]:
messages = [
    SystemMessage(content="For a given transcription from the video published online, give a summary of the video for the viewers"),
    HumanMessage(content=f"TRANSCRIPTION: {transcription}\SUMMARY:"),
]

response = model.invoke(messages)

In [33]:
response.content

'In this video, the speaker discusses the concept of "chain of thought prompting" as a technique to enhance the performance of language models, particularly in tasks that require multi-step reasoning. They explain that while traditional few-shot prompting may work for straightforward queries, it often falls short when complex reasoning is needed. The key idea is to encourage models to articulate their thought processes step-by-step rather than jumping directly to answers.\n\nThe speaker highlights the importance of structuring prompts in a way that illustrates the reasoning process, thereby enabling the model to generate more accurate responses. By providing detailed examples that break down the problem into intermediate steps, the model can learn to mimic this logical thinking in its future outputs.\n\nThe video emphasizes that this approach not only improves the accuracy of answers but also enhances interpretability, allowing users to understand the reasoning behind the model\'s conc

In [34]:
messages = [
    SystemMessage(content="You are an expert teacher in AI. For a given transcription from a video, generate 10 multiple choice questions that cover the topics discussed in this session. The questions are to be of recall-from-content type"),
    HumanMessage(content=f"TRANSCRIPTION: {transcription}\QUESTIONS:"),
]

response = model.invoke(messages)

In [35]:
print(response.content)

1. What is the main problem with few-shot prompting when it comes to multi-step reasoning?
   - A) It generates too many answers
   - B) It does not effectively guide the model through the necessary reasoning steps
   - C) It requires too much data
   - D) It is not applicable to language models
   
2. What is the key idea behind the chain of thought prompting method?
   - A) To provide straightforward answers without reasoning
   - B) To induce logical, step-by-step thinking in the model
   - C) To simplify the input questions
   - D) To minimize the number of examples used in prompting

3. How does a language model like LLM generate tokens?
   - A) By implementing logical reasoning
   - B) Based on a series of previous tokens and their probabilities
   - C) Through external computational tools
   - D) By guessing the answers

4. What does the implementation of chain of thought prompting aim to achieve?
   - A) Generate random outputs
   - B) Enable language models to generate a serie