In [None]:
!pip install SpeechRecognition
!pip install spacy



In [None]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from heapq import nlargest

# Audio to Text



In [None]:
import speech_recognition as sr

def audio_to_text(audio_file_path):
    # Initialize the recognizer
    recognizer = sr.Recognizer()

    # Load the audio file
    with sr.AudioFile(audio_file_path) as source:
        # Adjust for ambient noise
        recognizer.adjust_for_ambient_noise(source)

        # Record the audio
        audio = recognizer.record(source)

    try:
        # Recognize speech using Google Web Speech API
        text = recognizer.recognize_google(audio)
        return text
    except sr.UnknownValueError:
        print("Google Web Speech API could not understand audio")
    except sr.RequestError as e:
        print(f"Could not request results from Google Web Speech API; {e}")

# Specify the path to your audio file
audio_file_path = "/content/rpp16.wav"


# Convert audio to text
result = audio_to_text(audio_file_path)

# Print the result
print("Text from audio:", result)


In [None]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


# Fragmentation


In [None]:
from pydub import AudioSegment
import os

def split_audio(input_file, output_folder, segment_duration=15):
    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    audio = AudioSegment.from_wav(input_file)
    audio_duration = len(audio)

    for start_time in range(0, audio_duration, segment_duration * 1000):
        end_time = min(start_time + segment_duration * 1000, audio_duration)
        segment = audio[start_time:end_time]
        segment_path = os.path.join(output_folder, f"segment_{start_time//1000}-{end_time//1000}.wav")
        segment.export(segment_path, format="wav")

# Specify the path to your large audio file
large_audio_file_path = "/content/sample_data/output"

# Specify the output folder for segments
output_folder = "/content/output"

# Split the audio file into segments
split_audio(large_audio_file_path, output_folder)


In [None]:

from pydub import AudioSegment
import speech_recognition as sr

def transcribe_audio_segment(segment_file):
    recognizer = sr.Recognizer()

    with sr.AudioFile(segment_file) as source:
        audio_data = recognizer.record(source)

    try:
        text = recognizer.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        print(f"Speech recognition could not understand audio: {segment_file}")
        return ""
    except sr.RequestError as e:
        print(f"Speech recognition request error: {e}")
        return ""

def process_audio_file(input_audio_file, segment_duration=60):
    audio = AudioSegment.from_wav(input_audio_file)
    audio_duration = len(audio)

    segments = []
    for start_time in range(0, audio_duration, segment_duration * 1000):
        end_time = min(start_time + segment_duration * 1000, audio_duration)
        segment = audio[start_time:end_time]

        # Export segment to temporary WAV file
        segment_file = f"segment_{start_time//1000}-{end_time//1000}.wav"
        segment.export(segment_file, format="wav")

        # Transcribe the audio segment
        text = transcribe_audio_segment(segment_file)
        segments.append(text)

        # Clean up temporary segment file
        os.remove(segment_file)

    return segments

# Specify the path to your large audio file
large_audio_file_path = "/content/rpp16.wav"

# Process the audio file in segments (60-second segments by default)
transcribed_segments = process_audio_file(large_audio_file_path)

# Print the transcribed text for each segment
string = ""
for i, segment_text in enumerate(transcribed_segments, start=1):
    string+= segment_text

    print(f"Segment {i} Transcription: {segment_text}")

Segment 1 Transcription: hello everyone and welcome this is the rapid Planet podcast today is February 6th 2012 we're going to be talking about the latest and greatest technology news from the past 2 weeks but this episode is a little bit different because well we got new Mike's people that's right brand new podcasting Mike's in the house you can definitely hear a difference of how much better these sound so really excited to get this and let's try them out let's get to some tech news
Segment 2 Transcription: and we're ready to talk about some tech news so Microsoft now it's recently that it's going to be getting rid of its developer conference and my Xbox and it's kind of interesting because they just recently announced that they were pulling out of Cs 2012 says that they're going to be molding some of the functions of Windows Phone Internet Explorer HTML5
Segment 3 Transcription: Silverlight technology stuff like that into an upcoming developer conference that they are going to be ho

In [None]:
print(string)

hello everyone and welcome this is the rapid Planet podcast today is February 6th 2012 we're going to be talking about the latest and greatest technology news from the past 2 weeks but this episode is a little bit different because well we got new Mike's people that's right brand new podcasting Mike's in the house you can definitely hear a difference of how much better these sound so really excited to get this and let's try them out let's get to some tech newsand we're ready to talk about some tech news so Microsoft now it's recently that it's going to be getting rid of its developer conference and my Xbox and it's kind of interesting because they just recently announced that they were pulling out of Cs 2012 says that they're going to be molding some of the functions of Windows Phone Internet Explorer HTML5Silverlight technology stuff like that into an upcoming developer conference that they are going to be holding later this year so that's still going to be happening I guess the the c

# Summarisation



In [None]:
def summarize(text, per):
    nlp = spacy.load('en_core_web_sm')
    doc= nlp(text)
    tokens=[token.text for token in doc]
    word_frequencies={}
    for word in doc:
        if word.text.lower() not in list(STOP_WORDS):
            if word.text.lower() not in punctuation:
                if word.text not in word_frequencies.keys():
                    word_frequencies[word.text] = 1
                else:
                    word_frequencies[word.text] += 1
    max_frequency=max(word_frequencies.values())
    for word in word_frequencies.keys():
        word_frequencies[word]=word_frequencies[word]/max_frequency
    sentence_tokens= [sent for sent in doc.sents]
    sentence_scores = {}
    for sent in sentence_tokens:
        for word in sent:
            if word.text.lower() in word_frequencies.keys():
                if sent not in sentence_scores.keys():
                    sentence_scores[sent]=word_frequencies[word.text.lower()]
                else:
                    sentence_scores[sent]+=word_frequencies[word.text.lower()]
    select_length=int(len(sentence_tokens)*per)
    summary=nlargest(select_length, sentence_scores,key=sentence_scores.get)
    final_summary=[word.text for word in summary]
    summary=''.join(final_summary)
    return summary

In [None]:
summarize(string ,0.1)

"it's not an April Fool's joke it's actually happening and Stringer the old CEO is moving to chairman of the board so that's a big shake-up for Sony and hopefully her ride turns things around I mean he he's partially responsible for the recent months and hopefully he continues that he's been doing pretty well lately so hopefully we see that continued best of luck we're not going to spend very much time on this next thing since I'm not a huge number person but Facebook has filed for IPO IPO standing for initial public offering and what that means is Facebook is trying to become a publicly traded company that's going to be trading under the stock symbol FB andopen the phrase something like 5 billion dollars in the process of all this so that's good next up the next version of Microsoft Office office office 1500 the technical preview program stage which means that Microsoft is allowing a small amount of third-party customers to test and Report bugs and stuff from Office 15 under a non-dis

# Rouge Score(Evaluation)

In [None]:
!pip install rouge
from rouge import Rouge


reference_summary = "Welcome to the Rapid Planet Podcast! Today is February 6th, 2012, and we have some exciting technology news to cover from the past two weeks. First off, we’re thrilled to announce our new microphones, which have significantly improved our audio quality.Starting with Microsoft, they have announced they will be discontinuing their developer conference and My Xbox event, consolidating these into a larger developer conference later this year. This comes on the heels of Microsoft pulling out of CES 2012. They plan to include Windows Phone, Internet Explorer, HTML5, and Silverlight technologies in the new conference.In unfortunate news, there have been significant shake-ups in the tech industry. John Rubinstein, former CEO of Palm, has left HP. At Research In Motion (RIM), Mike Lazaridis and Jim Balsillie have stepped down, making way for Thorsten Heins as the new CEO. RIM has been struggling with declining sales and market respect.HP has announced that webOS is going open source. They aim to complete this transition by September 2012, providing developers with tools to create new applications and potentially port the OS to other devices.Nintendo has announced the Nintendo Network, a new online service for the 3DS and the upcoming Wii U. This network will feature personal user accounts and possibly allow digital game downloads, bringing it closer to services like PSN and Xbox Live.Sony has named Kazuo Hirai as the new CEO, effective April 1st, with former CEO Howard Stringer moving to chairman of the board. Hirai is known for his success with the PlayStation division, and there is hope he will continue this positive trend for Sony.Facebook has filed for an IPO, aiming to raise around $5 billion as they transition to a publicly traded company under the symbol FBThe next version of Microsoft Office, Office 15, has entered the technical preview program stage. A public beta is expected this summer. Additionally, Microsoft has released a Kinect for Windows SDK, which could lead to Kinect integration in portable devices.There are rumors that Steve Jobs was interested in Lytro’s panoptic camera technology. This camera allows users to refocus photos after they are taken. There’s speculation about integrating Lytro technology with iOS, potentially revolutionizing mobile photography.The next version of Windows Phone, codenamed Apollo, is expected late in 2012. Features mentioned include multi-core processor support, various screen resolutions, microSD support, NFC, and deep integration with Skype. BitLocker encryption support similar to Windows Enterprise on PCs is also anticipated, which would be a significant security enhancement.Finally, the Rapid Planet Podcast will transition to a video format starting April 29th, available on YouTube and iTunes. We recently posted a preview video and are eager to hear feedback on the new format.That’s all for today’s episode. If we missed any major stories, let us know, and we’ll cover them next time. Thank you for tuning in!"
#Initialize ROUGE
rouge = Rouge()

#Calculate ROUGE scores
scores = rouge.get_scores(string , reference_summary)

#Print the ROUGE scores
print(scores)

[{'rouge-1': {'r': 0.3161290322580645, 'p': 0.33676975945017185, 'f': 0.32612312312479763}, 'rouge-2': {'r': 0.0982532751091703, 'p': 0.087890625, 'f': 0.09278350017013524}, 'rouge-l': {'r': 0.25483870967741934, 'p': 0.27147766323024053, 'f': 0.26289516971381593}}]
