## 0. Installing ibm_watson on your virtual environment

In [2]:
#UNCOMMENT THE LINE ACCORDING TO YOUR PYTHON DEVELOPMENT CONFIGURATION ENVIRONMENT. 

#default option to install ibm_watson if you are running jupyter over your virtualenv with no other interfaces. 
#!pip install ibm_watson

#if you use anaconda
#!conda install -c conda-forge ibm-watson

## 1. Imports

In [3]:
from ibm_watson import SpeechToTextV1
from ibm_watson.websocket import RecognizeCallback, AudioSource 
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

import os
import json

## 2. Setup STT Service

In [3]:
#set your API KEY and URL. You can get access to these values in the section Manage in Speech to Text API. 
apikey = 'YOUR API KEY'
url = 'YOUR URL'

In [4]:
authenticator = IAMAuthenticator(apikey)
stt = SpeechToTextV1(authenticator=authenticator)
stt.set_service_url(url)

## 3. Functions and Data Structures 

In [45]:
FOLDER_DATA_PATH = 'your_folder/'       #replace by the folder where you have your audio files. 

C_TYPE = 'audio/mp3'                    #replace by the content type. 

FORMAT = '.mp3'                         #put the extension of your audio files. 

MODEL_LANGUAGE = 'es-CO_BroadbandModel' # Replace the model. 
                                        # 'es-CO_BroadbandModel' is for Spanish - Colombian. 
                                        # en-US_BroadbandModel is for English US. 
                                        # To see available models, check http://shorturl.at/mxNSU

In [46]:
def extract_transcript(results):
    text = ""
    confidence = 0
    for r in results:
        text += r['alternatives'][0]['transcript']
        confidence += r['alternatives'][0]['confidence']
    if len(results) > 0:
        confidence = confidence/len(results)     
    return text, confidence

In [47]:
def extract_transcripts_from_folder(folder_path, c_type, model_language):
    i = 0
    print("Files in process in", folder_path)
    for filename in os.listdir(folder_path):
        if filename.endswith(FORMAT):
            with open(os.path.join(folder_path, filename), 'rb') as f:
                res = stt.recognize(audio=f, content_type=c_type, model=model_language, continuous=True).get_result()
                text, confidence = extract_transcript(res['results'])
                
                video_dict = {"filename": filename, "transcript" : text,"confidence": confidence}
                
                data_transcripts.append(video_dict)
                i+=1
                print(i," - ", filename)
                
    #number of videos processed
    print('Number of files processed in',folder_path, len(data_transcripts))

In [48]:
def save_json_file(data_dict, out_file):
    with open(out_file, 'w') as fp:
         fp.write( '[' + ',\n'.join(json.dumps(i) for i in data_dict) + ']\n')

## 4. Convert and Save

In [49]:
data_transcripts = []
transcripts_out_json_file = FOLDER_DATA_PATH + '_data_transcripst.json'
extract_transcripts_from_folder(FOLDER_DATA_PATH, C_TYPE, MODEL_LANGUAGE)
save_json_file(data_transcripts, transcripts_out_json_file)

Files in process in data/5/
1  -  Respuesta Marco Chaparro - Fil¢sofo (Audio).mp4.wav
2  -  Respuesta Milton Rojas - Crucero (Audio).mp4.wav
3  -  Respuesta Natalia Jaramillo - Gardel.mp4.wav
4  -  Respuesta Nelson Gonz†lez - Arquitecto.mp4.wav
5  -  Respuesta Nicolalyey Su†rez - Generar empleo (Audio).mp4.wav
6  -  Respuesta Nicol†s Osorio - Maestr°a (Audio).mp4.wav
7  -  Respuesta Octavio AndrÇs - Matrimonio (Audio).mp4.wav
8  -  Respuesta Orlando Tovar - Moto.mp4.wav
9  -  Respuesta Pao Zoo - Europa (Audio).mp4.wav
10  -  Respuesta Paula Gallego   Casa.mp4.wav
11  -  Respuesta Paula JimÇnez - Mundial Brasil.mp4.wav
12  -  Respuesta Paula Valbuena   Rock.mp4.wav
13  -  Respuesta Rafael Ricardo Morelo - Agencia Propia (Audio).mp4.wav
14  -  Respuesta Saimon Cadavid - Centro de la tierra.mp4.wav
15  -  Respuesta Santiago VÇlez - Aplausos.mp4.wav
16  -  Respuesta Sara G¢mez - La India.mp4.wav
17  -  Respuesta Sa£l Suaza   Machu Pichu.mp4.wav
18  -  Respuesta Sebasti†n Henao - Carro t

In [50]:
len(data_transcripts)

18