### Simple exploration with Sample Data

In [1]:
import sqlite3
import ffmpeg

In [3]:
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline


device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

model_id = "openai/whisper-tiny"
model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)

processor = AutoProcessor.from_pretrained(model_id)

pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch_dtype,
    device=device,
    return_timestamps=True
)

Device set to use cuda:0


In [6]:
result = pipe("data/WhatsApp Song.mp3")
print(result["text"])



 So that I can't imagine being in my eyes Now as I clap you get a news of my name Think you can run my heart and I forget this Think I'm crying so much, you know why you need And a little one on the other song Guess I didn't want anything Just get right on but you still hit the phone down The old people we know and I think You'll find that I don't want to go back on TV I'm so proud of my mother, don't I? You don't see like that, but I'm never alive to look back at it I'm not strong, I'm not the best I've ever done I'm not that job, but I'm still going to die now That is sleeping on my own, but is it your life? What is that? What do you think? Go back and stand. And you think that is true. I'll do something that should go on You'll see me when you're too late But I'll be ready to see The way that was in the middle of the day Never forget time you told me Why did you let go of me? And I try to make me feel again to the crowd Little water by the wall, and I guess I didn't want anyone to b

In [7]:
result

{'text': " So that I can't imagine being in my eyes Now as I clap you get a news of my name Think you can run my heart and I forget this Think I'm crying so much, you know why you need And a little one on the other song Guess I didn't want anything Just get right on but you still hit the phone down The old people we know and I think You'll find that I don't want to go back on TV I'm so proud of my mother, don't I? You don't see like that, but I'm never alive to look back at it I'm not strong, I'm not the best I've ever done I'm not that job, but I'm still going to die now That is sleeping on my own, but is it your life? What is that? What do you think? Go back and stand. And you think that is true. I'll do something that should go on You'll see me when you're too late But I'll be ready to see The way that was in the middle of the day Never forget time you told me Why did you let go of me? And I try to make me feel again to the crowd Little water by the wall, and I guess I didn't want a

In [4]:
result = pipe("data/Sample 1.mp3")
print(result["text"])

Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.43.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


 My name is Ethan. I was asked to come here by 11. Now it is already 3 p.m. They did not even serve me any food or drinks. Terrible


In [5]:
result = pipe("data/Sample 2.mp3")
print(result["text"])

 Help me! I can't find my parents. They told me to wait for them, but I saw this pretty butterfly and followed it. Now I am lost.


In [6]:
result = pipe("data/Sample 3.mp3")
print(result["text"])

 What should I have for lunch? There's only young tofu, Western, Japanese, economic rice stalls here. I'm sick of the choices here.


### Store Transcription in Database

Setting up connection to in-memory database

In [2]:
from sqlalchemy import create_engine
engine = create_engine("sqlite:///test.db")

Create table

In [3]:
from sqlalchemy import MetaData
metadata_obj = MetaData()

In [4]:
from sqlalchemy import Table, Column, Integer, String
transcription_table = Table(
    "transcription",
    metadata_obj,
    Column("id", Integer, primary_key=True),
    Column("filename", String),
    Column("transcribed_text", String),
    Column("timestamp", Integer),
)

In [5]:
metadata_obj.create_all(engine)

Select from table

In [6]:
from sqlalchemy import select
sel_stmt = select(transcription_table).limit(5).offset(0) #.where(transcription_table.c.filename == "Sample 1.mp3")
print(sel_stmt)

SELECT transcription.id, transcription.filename, transcription.transcribed_text, transcription.timestamp 
FROM transcription
 LIMIT :param_1 OFFSET :param_2


In [7]:
with engine.connect() as conn:
    for row in conn.execute(sel_stmt):
        print(row)

In [12]:
with engine.connect() as conn:
    result = conn.execute(sel_stmt)
    print(result.fetchall())

[(1, 'Sample 1.mp3', 'Spongebob Squarepants', None)]


Insert into table

In [9]:
from sqlalchemy import insert
ins_stmt = insert(transcription_table).values(
    filename="Sample 1.mp3", 
    transcribed_text="Spongebob Squarepants"
)
print(ins_stmt)

INSERT INTO transcription (filename, transcribed_text) VALUES (:filename, :transcribed_text)


In [10]:
with engine.connect() as conn:
    result = conn.execute(ins_stmt)
    conn.commit()

In [11]:
result.inserted_primary_key[0]

1

Update table

In [13]:
from sqlalchemy import update
upd_stmt = update(transcription_table).where(
    transcription_table.c.id == 1
).values(transcribed_text="Patrick Star")

In [14]:
with engine.connect() as conn:
    result = conn.execute(upd_stmt)
    conn.commit()

Get count

In [15]:
from sqlalchemy import func
count_stmt = select(func.count(transcription_table.c.id))
with engine.connect() as conn:
    result = conn.execute(count_stmt)
    print(result.scalar())

1


### Get Machine Time

In [100]:
import time

# using now() to get current time in millisecond
current_time = int(time.time() * 1000)
print(current_time)

1735058138589


In [102]:
curr_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(current_time / 1000))

print("Current Time is :", curr_time)

Current Time is : 2024-12-25 00:35:38
