In [None]:
!pip install pyAudioAnalysis
!pip install eyed3
!pip install pydub
!pip install hmmlearn
!pip install SpeechRecognition

from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import audioSegmentation
import soundfile as sf
import speech_recognition as sr
import os
from IPython.display import Audio

# Load audio file
input_file = "/content/final audio.wav"
sampling_rate, audio_signal = audioBasicIO.read_audio_file(input_file)

# Perform voice activity detection
segments = audioSegmentation.silence_removal(audio_signal, sampling_rate, 0.020, 0.020)

# Initialize SpeechRecognition recognizer
recognizer = sr.Recognizer()

# Create directories to save the segments for each person
person1_dir = "person1_segments"
person2_dir = "person2_segments"
os.makedirs(person1_dir, exist_ok=True)
os.makedirs(person2_dir, exist_ok=True)

# Iterate through detected segments and perform speech recognition for each person
for i, seg in enumerate(segments):
    start_sec, end_sec = seg
    start_frame = int(start_sec * sampling_rate)
    end_frame = int(end_sec * sampling_rate)

    # Extract segment from the audio signal
    segment_audio = audio_signal[start_frame:end_frame]

    # Save the segment as a separate audio file
    if i % 2 == 0:
        segment_filename = os.path.join(person1_dir, f"segment_{i//2}.wav")
    else:
        segment_filename = os.path.join(person2_dir, f"segment_{i//2}.wav")
    sf.write(segment_filename, segment_audio, sampling_rate)

    # Perform speech recognition on the segment
    with sr.AudioFile(segment_filename) as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data)
            if i % 2 == 0:
                print(f"Person 1 - Segment {i//2+1}: '{text}'")
            else:
                print(f"Person 2 - Segment {i//2+1}: '{text}'")
            # Display the audio segment
            display(Audio(segment_audio, rate=sampling_rate))
        except sr.UnknownValueError:
            if i % 2 == 0:
                print(f"Person 1 - Segment {i//2+1}: Speech recognition could not understand audio")
            else:
                print(f"Person 2 - Segment {i//2+1}: Speech recognition could not understand audio")
        except sr.RequestError as e:
            if i % 2 == 0:
                print(f"Person 1 - Segment {i//2+1}: Error in requesting results from Google Speech Recognition service: {e}")
            else:
                print(f"Person 2 - Segment {i//2+1}: Error in requesting results from Google Speech Recognition service: {e}")


Collecting pyAudioAnalysis
  Downloading pyAudioAnalysis-0.3.14.tar.gz (41.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.3/41.3 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyAudioAnalysis
  Building wheel for pyAudioAnalysis (setup.py) ... [?25l[?25hdone
  Created wheel for pyAudioAnalysis: filename=pyAudioAnalysis-0.3.14-py3-none-any.whl size=41264373 sha256=61e77961cb804f297190426424309dfaa5e0949a8fd5938b7622ccbc14c3ec75
  Stored in directory: /root/.cache/pip/wheels/a7/54/73/fa830689c2440d2c81ff175c60e374930ad1607a8881e0f43f
Successfully built pyAudioAnalysis
Installing collected packages: pyAudioAnalysis
Successfully installed pyAudioAnalysis-0.3.14
Collecting eyed3
  Downloading eyed3-0.9.7-py3-none-any.whl (246 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m246.1/246.1 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting

Person 2 - Segment 1: 'good morning'


Person 1 - Segment 2: 'nice to meet you'


Person 2 - Segment 2: 'nice to meet you'
