7
7
# create a speech recognition object
8
8
r = sr .Recognizer ()
9
9
10
- # a function that splits the audio file into chunks
10
+ # a function to recognize speech in the audio file
11
+ # so that we don't repeat ourselves in in other functions
12
+ def transcribe_audio (path ):
13
+ # use the audio file as the audio source
14
+ with sr .AudioFile (path ) as source :
15
+ audio_listened = r .record (source )
16
+ # try converting it to text
17
+ text = r .recognize_google (audio_listened )
18
+ return text
19
+
20
+ # a function that splits the audio file into chunks on silence
11
21
# and applies speech recognition
12
- def get_large_audio_transcription (path ):
13
- """
14
- Splitting the large audio file into chunks
15
- and apply speech recognition on each of these chunks
16
- """
22
+ def get_large_audio_transcription_on_silence (path ):
23
+ """Splitting the large audio file into chunks
24
+ and apply speech recognition on each of these chunks"""
17
25
# open the audio file using pydub
18
- sound = AudioSegment .from_wav (path )
19
- # split audio sound where silence is 700 miliseconds or more and get chunks
26
+ sound = AudioSegment .from_file (path )
27
+ # split audio sound where silence is 500 miliseconds or more and get chunks
20
28
chunks = split_on_silence (sound ,
21
29
# experiment with this value for your target audio file
22
30
min_silence_len = 500 ,
@@ -37,24 +45,59 @@ def get_large_audio_transcription(path):
37
45
chunk_filename = os .path .join (folder_name , f"chunk{ i } .wav" )
38
46
audio_chunk .export (chunk_filename , format = "wav" )
39
47
# recognize the chunk
40
- with sr .AudioFile (chunk_filename ) as source :
41
- audio_listened = r .record (source )
42
- # try converting it to text
43
- try :
44
- text = r .recognize_google (audio_listened )
45
- except sr .UnknownValueError as e :
46
- print ("Error:" , str (e ))
47
- else :
48
- text = f"{ text .capitalize ()} . "
49
- print (chunk_filename , ":" , text )
50
- whole_text += text
48
+ try :
49
+ text = transcribe_audio (chunk_filename )
50
+ except sr .UnknownValueError as e :
51
+ print ("Error:" , str (e ))
52
+ else :
53
+ text = f"{ text .capitalize ()} . "
54
+ print (chunk_filename , ":" , text )
55
+ whole_text += text
51
56
# return the text for all chunks detected
52
57
return whole_text
53
58
54
59
60
+ # a function that splits the audio file into fixed interval chunks
61
+ # and applies speech recognition
62
+ def get_large_audio_transcription_fixed_interval (path , minutes = 5 ):
63
+ """Splitting the large audio file into fixed interval chunks
64
+ and apply speech recognition on each of these chunks"""
65
+ # open the audio file using pydub
66
+ sound = AudioSegment .from_file (path )
67
+ # split the audio file into chunks
68
+ chunk_length_ms = int (1000 * 60 * minutes ) # convert to milliseconds
69
+ chunks = [sound [i :i + chunk_length_ms ] for i in range (0 , len (sound ), chunk_length_ms )]
70
+ folder_name = "audio-fixed-chunks"
71
+ # create a directory to store the audio chunks
72
+ if not os .path .isdir (folder_name ):
73
+ os .mkdir (folder_name )
74
+ whole_text = ""
75
+ # process each chunk
76
+ for i , audio_chunk in enumerate (chunks , start = 1 ):
77
+ # export audio chunk and save it in
78
+ # the `folder_name` directory.
79
+ chunk_filename = os .path .join (folder_name , f"chunk{ i } .wav" )
80
+ audio_chunk .export (chunk_filename , format = "wav" )
81
+ # recognize the chunk
82
+ try :
83
+ text = transcribe_audio (chunk_filename )
84
+ except sr .UnknownValueError as e :
85
+ print ("Error:" , str (e ))
86
+ else :
87
+ text = f"{ text .capitalize ()} . "
88
+ print (chunk_filename , ":" , text )
89
+ whole_text += text
90
+ # return the text for all chunks detected
91
+ return whole_text
92
+
93
+
94
+
55
95
if __name__ == '__main__' :
56
96
import sys
57
97
# path = "30-4447-0004.wav"
58
98
# path = "7601-291468-0006.wav"
59
99
path = sys .argv [1 ]
60
- print ("\n Full text:" , get_large_audio_transcription (path ))
100
+ print ("\n Full text:" , get_large_audio_transcription_on_silence (path ))
101
+ print ("=" * 50 )
102
+ print ("\n Full text:" , get_large_audio_transcription_fixed_interval (path , minutes = 1 / 6 ))
103
+
0 commit comments