### 🧱 Step 1: Audio I/O (Hear & Speak Back)

In [1]:
import sounddevice as sd
import numpy as np
import scipy.io.wavfile as wav

In [2]:
# Settings
duration = 3   # seconds
fs = 16000     # sample rate (16 kHz)

In [5]:
print("🎤 Recording... Speak now!")
recording = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
sd.wait()  # wait until done
print("✅ Recording complete!")

# Save as a file (optional)
wav.write("Recordings/whispy_test.wav", fs, recording)

# Playback
print("🔊 Playing back your voice...")
sd.play(recording, fs)
sd.wait()
print("🎉 Done! That’s your voice.")


🎤 Recording... Speak now!
✅ Recording complete!
🔊 Playing back your voice...
🎉 Done! That’s your voice.


### 🧱 Step 2: Whispy learns to understand words (STT)

In [8]:
import sounddevice as sd
import queue
import json
from vosk import Model, KaldiRecognizer

In [None]:
# Load Vosk model
model = Model("model")
recognizer = KaldiRecognizer(model, 16000)

In [9]:
q = queue.Queue()

In [None]:
def callback(indata, frames, time, status):
    if status:
        print(status)
    q.put(bytes(indata))

In [21]:
# Open microphone stream
with sd.RawInputStream(samplerate=16000, blocksize=8000, dtype='int16',
                       channels=1, callback=callback):

    print("🎤 Whispy is listening... Say something! (say 'stop' to quit)")

    while True:
        data = q.get()
        if recognizer.AcceptWaveform(data):
            result = json.loads(recognizer.Result())
            text = result["text"]
            if text:
                print("📝 Whispy heard:", text)
                if "stop" in text.lower():
                    print("👋 Whispy says: Bye Mansour!")
                    break

🎤 Whispy is listening... Say something! (say 'stop' to quit)
📝 Whispy heard: dave
📝 Whispy heard: so how are you will be
📝 Whispy heard: they heard would miss him smuggled they've who is dave
📝 Whispy heard: no no no no way to his me
📝 Whispy heard: know your name is was be lucky that help our yeah that tackling your name is ray speak
📝 Whispy heard: it is was no no wait wait wait
📝 Whispy heard: the guy as a way to it can we start from over
📝 Whispy heard: what a fuck man
📝 Whispy heard: we're lucky caligula in the way that either subassembly told to shake it has been this got that stop
👋 Whispy says: Bye Mansour!


### 🧱 Step 3: Whispy Talks Back (TTS)

In [1]:
import sounddevice as sd
import queue
import json
import pyttsx3
from vosk import Model, KaldiRecognizer

In [2]:
# Load Vosk model
model = Model("model")
recognizer = KaldiRecognizer(model, 16000)

In [3]:
# TTS engine
engine = pyttsx3.init()

In [4]:
q = queue.Queue()

In [5]:
def callback(indata, frames, time, status):
    if status:
        print(status)
    q.put(bytes(indata))

In [6]:
def speak(text):
    print("🗣️ Whispy says:", text)
    engine.say(text)
    engine.runAndWait()

In [7]:
# Open microphone stream
with sd.RawInputStream(samplerate=16000, blocksize=8000, dtype='int16',
                       channels=1, callback=callback):

    speak("Hello Mansour! I am Whispy. Say something. Say stop to quit.")

    while True:
        data = q.get()
        if recognizer.AcceptWaveform(data):
            result = json.loads(recognizer.Result())
            text = result["text"]
            if text:
                print("📝 Whispy heard:", text)

                if "stop" in text.lower():
                    speak("Goodbye Mansour, see you soon!")
                    break
                elif "hello" in text.lower():
                    speak("Hello! Nice to hear you.")
                elif "your name" in text.lower():
                    speak("My name is Whispy. I am your voice assistant.")
                else:
                    speak("I heard you say " + text)

🗣️ Whispy says: Hello Mansour! I am Whispy. Say something. Say stop to quit.
📝 Whispy heard: hi
🗣️ Whispy says: I heard you say hi
📝 Whispy heard: hello
🗣️ Whispy says: Hello! Nice to hear you.
📝 Whispy heard: where
🗣️ Whispy says: I heard you say where
📝 Whispy heard: he knew service where are you now
🗣️ Whispy says: I heard you say he knew service where are you now
📝 Whispy heard: ha
🗣️ Whispy says: I heard you say ha
📝 Whispy heard: what are you doing
🗣️ Whispy says: I heard you say what are you doing
📝 Whispy heard: can you reply
🗣️ Whispy says: I heard you say can you reply
📝 Whispy heard: useless
🗣️ Whispy says: I heard you say useless
📝 Whispy heard: i didn't say is was
🗣️ Whispy says: I heard you say i didn't say is was
📝 Whispy heard: stop
🗣️ Whispy says: Goodbye Mansour, see you soon!


### 🧱 Step 4: Add App/Website Commands 🌐

In [8]:
import sounddevice as sd
import queue
import json
import pyttsx3
import webbrowser
from vosk import Model, KaldiRecognizer

In [9]:
# Load Vosk model
model = Model("model")
recognizer = KaldiRecognizer(model, 16000)

In [10]:
# TTS engine
engine = pyttsx3.init()

In [11]:
q = queue.Queue()

In [12]:
def callback(indata, frames, time, status):
    if status:
        print(status)
    q.put(bytes(indata))

In [13]:
def speak(text):
    print("🗣️ Whispy says:", text)
    engine.say(text)
    engine.runAndWait()

In [15]:
# Open microphone stream
with sd.RawInputStream(samplerate=16000, blocksize=8000, dtype='int16',
                       channels=1, callback=callback):

    speak("Hello Mansour! I am Whispy. Say a command. Say stop to quit.")

    while True:
        data = q.get()
        if recognizer.AcceptWaveform(data):
            result = json.loads(recognizer.Result())
            text = result["text"]
            if text:
                print("📝 Whispy heard:", text)

                if "stop" in text.lower():
                    speak("Goodbye Mansour, see you soon!")
                    break

                elif "hello" in text.lower():
                    speak("Hello! Nice to hear you.")

                elif "your name" in text.lower():
                    speak("My name is Whispy. I am your voice assistant.")

                elif "you tube" in text.lower():
                    speak("Opening YouTube for you!")
                    webbrowser.open("https://www.youtube.com")

                elif "git hub" in text.lower() or "get up" in text.lower():
                    speak("Opening GitHub for you!")
                    webbrowser.open("https://www.github.com")

                elif "facebook" in text.lower():
                    speak("Opening Facebook for you!")
                    webbrowser.open("https://www.facebook.com")

                else:
                    speak("I heard you say " + text)

🗣️ Whispy says: Hello Mansour! I am Whispy. Say a command. Say stop to quit.
📝 Whispy heard: can you
🗣️ Whispy says: I heard you say can you
📝 Whispy heard: can you have a you tube of content on you tube time religion
🗣️ Whispy says: Opening YouTube for you!
📝 Whispy heard: oh older than the thank you
🗣️ Whispy says: I heard you say oh older than the thank you
📝 Whispy heard: can you imagine i get up the tab maybe hello
🗣️ Whispy says: Hello! Nice to hear you.
📝 Whispy heard: can you are when get her for me
🗣️ Whispy says: I heard you say can you are when get her for me
📝 Whispy heard: there you know get up get up get up
🗣️ Whispy says: Opening GitHub for you!
📝 Whispy heard: i will i'll i'll come back and a little eligibility the recommended bus stop the durkan linda sister had
🗣️ Whispy says: Goodbye Mansour, see you soon!


### 🧱 Step 5: Add Time & Date Commands

In [16]:
import sounddevice as sd
import queue
import json
import pyttsx3
import webbrowser
from vosk import Model, KaldiRecognizer
from datetime import datetime

In [17]:
# Load Vosk model
model = Model("model")
recognizer = KaldiRecognizer(model, 16000)

In [18]:
# TTS engine
engine = pyttsx3.init()

In [19]:
q = queue.Queue()

In [20]:
def callback(indata, frames, time, status):
    if status:
        print(status)
    q.put(bytes(indata))

In [21]:
def speak(text):
    print("🗣️ Whispy says:", text)
    engine.say(text)
    engine.runAndWait()

In [23]:
# Open microphone stream
with sd.RawInputStream(samplerate=16000, blocksize=8000, dtype='int16',
                       channels=1, callback=callback):

    speak("Hello Mansour! I am Whispy. Say a command. Say stop to quit.")

    while True:
        data = q.get()
        if recognizer.AcceptWaveform(data):
            result = json.loads(recognizer.Result())
            text = result["text"]
            if text:
                print("📝 Whispy heard:", text)

                if "stop" in text.lower() or "bye" in text.lower():
                    speak("Goodbye Mansour, see you soon!")
                    break

                elif "hello" in text.lower():
                    speak("Hello! Nice to hear you.")

                elif "your name" in text.lower():
                    speak("My name is Whispy. I am your voice assistant.")

                elif "youtube" in text.lower():
                    speak("Opening YouTube for you!")
                    webbrowser.open("https://www.youtube.com")

                elif "github" in text.lower():
                    speak("Opening GitHub for you!")
                    webbrowser.open("https://www.github.com")

                elif "facebook" in text.lower():
                    speak("Opening Facebook for you!")
                    webbrowser.open("https://www.facebook.com")

                elif "time" in text.lower():
                    now = datetime.now().strftime("%H:%M")
                    speak(f"The time is {now}")

                elif "date" in text.lower():
                    today = datetime.now().strftime("%A, %B %d, %Y")
                    speak(f"Today is {today}")

                else:
                    speak("I heard you say " + text)

🗣️ Whispy says: Hello Mansour! I am Whispy. Say a command. Say stop to quit.
📝 Whispy heard: my
🗣️ Whispy says: I heard you say my
📝 Whispy heard: by
🗣️ Whispy says: I heard you say by
📝 Whispy heard: wow
🗣️ Whispy says: I heard you say wow
📝 Whispy heard: by goodbye goodbye with be
🗣️ Whispy says: Goodbye Mansour, see you soon!
