<a href="https://colab.research.google.com/github/v-jhonattan/assist-ncia_virtual_dozero/blob/main/Untitled13.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [77]:
!pip -q install SpeechRecognition gTTS ffmpeg-python



In [78]:
import os, base64, subprocess, urllib.parse, json
from IPython.display import Audio, Javascript, HTML, display
from google.colab import output
import speech_recognition as sr
from gtts import gTTS

In [79]:
def speak(text, lang='pt', fname='tts_output.mp3', autoplay=True):
    tts = gTTS(text=text, lang=lang)
    tts.save(fname)
    return Audio(fname, autoplay=autoplay)

def open_new_tab(url, label=None):
    label = label or url
    display(HTML(f'<a href="{url}" target="_blank" rel="noopener">{label}</a>'))
    display(Javascript(f'window.open("{url}", "_blank");'))


In [80]:
# ==== FIX 1: gravador com fallback (mic → arquivo) ====
import base64, subprocess
from IPython.display import HTML, display
from google.colab import output

def record_microphone_ui_or_file(seconds=5, webm_path='input.webm', wav_path='input.wav'):
    display(HTML('<button id="recbtn" style="font-size:16px;">🎙️ Clique para gravar</button>'))
    js = """
    async function recordMic(sec){
      const stream = await navigator.mediaDevices.getUserMedia({audio:true});
      const rec = new MediaRecorder(stream);
      let chunks = [];
      rec.ondataavailable = e => chunks.push(e.data);
      rec.start();
      await new Promise(r => setTimeout(r, sec*1000));
      rec.stop();
      await new Promise(r => rec.onstop = r);
      const blob = new Blob(chunks, {type:'audio/webm'});
      const reader = new FileReader();
      return await new Promise(res => { reader.onloadend = () => res(reader.result); reader.readAsDataURL(blob); });
    }
    async function fileFallback(){
      return await new Promise(resolve=>{
        const el = document.createElement('input');
        el.type = 'file';
        el.accept = 'audio/*';
        el.capture = 'microphone';
        el.onchange = () => {
          const f = el.files[0];
          if(!f){ resolve(""); return; }
          const reader = new FileReader();
          reader.onloadend = () => resolve(reader.result);
          reader.readAsDataURL(f);
        };
        el.click();
      });
    }
    (async ()=>{
      const btn = document.getElementById('recbtn');
      btn.disabled = false;
      btn.textContent = '🎙️ Clique para gravar';
      const dataUrl = await new Promise(resolve=>{
        btn.onclick = async ()=>{
          btn.disabled = true;
          btn.textContent = '🔴 Gravando...';
          try{
            const d = await recordMic(%d);
            btn.textContent = '✅ Gravado';
            resolve(d);
          } catch(e){
            btn.textContent = '📁 Escolha/Grave um áudio…';
            const d = await fileFallback();
            btn.textContent = d ? '✅ Arquivo selecionado' : '❌ Sem áudio';
            resolve(d);
          }
        }
      });
      return dataUrl;
    })();
    """ % int(seconds)

    dataurl = output.eval_js(js)
    if not dataurl:
        raise AssertionError("Sem áudio: habilite o microfone ou selecione um arquivo.")

    header, encoded = dataurl.split(",", 1)
    mime = header.split(";")[0].replace("data:", "")
    raw_path = webm_path if "webm" in mime else "input_audio.bin"
    with open(raw_path, "wb") as f:
        f.write(base64.b64decode(encoded))

    subprocess.run(
        ["ffmpeg", "-y", "-i", raw_path, "-ac", "1", "-ar", "16000", wav_path],
        stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, check=True
    )
    return wav_path


# ==== FIX 2: text_round compatível com (action, arg, opts) ====
def text_round(cmd: str):
    print("Comando:", cmd)
    action, arg, opts = parse_intent_ptbr(cmd)

    if action == "wikipedia":
        display(speak(f"Abrindo Wikipedia sobre {arg}", lang='pt'))
        # usa resumo falado se opts['summary'] vier True
        if 'action_wikipedia_or_summary' in globals():
            action_wikipedia_or_summary(arg, speak_summary=opts.get('summary', False))
        else:
            action_wikipedia(arg)

    elif action == "youtube":
        display(speak(f"Abrindo YouTube com busca por {arg}", lang='pt'))
        action_youtube(arg)

    elif action == "farmacia":
        coords = try_browser_geolocation()
        if coords:
            display(speak("Localização obtida. Abrindo farmácias próximas.", lang='pt'))
            action_pharmacy_nearby(coords)
        else:
            if arg:
                display(speak(f"Sem localização. Usando a cidade {arg}.", lang='pt'))
                action_pharmacy_nearby(arg)
            else:
                display(speak("Sem localização. Diga por exemplo: farmacia Curitiba.", lang='pt'))
                action_help()

    elif action == "google":
        display(speak(f"Pesquisando por {arg}", lang='pt'))
        action_google(arg)

    elif action == "tempo":
        display(speak(f"Abrindo previsão do tempo {('de ' + arg) if arg else ''}", lang='pt'))
        action_weather(arg)

    elif action == "help":
        action_help(); display(speak("Listei os comandos na tela.", lang='pt'))

    elif action == "exit":
        display(speak("Até mais! Encerrando.", lang='pt'))

    else:
        display(speak("Comando não reconhecido. Diga 'ajuda' para exemplos.", lang='pt'))
        action_help()


In [81]:
def transcribe_pt_br(wav_path, fallback_to_english=False):
    r = sr.Recognizer()
    with sr.AudioFile(wav_path) as source:
        audio = r.record(source)
    try:
        text = r.recognize_google(audio, language='pt-BR')
        return text
    except sr.UnknownValueError:
        if fallback_to_english:
            try:
                return r.recognize_google(audio, language='en-US')
            except Exception:
                return ""
        return ""
    except Exception as e:
        print("[STT ERRO]", e)
        return ""


In [82]:
!pip -q install wikipedia
import wikipedia
wikipedia.set_lang('pt')

def wiki_summary(query, sentences=2):
    try:
        title = wikipedia.search(query)[0]
        summ = wikipedia.summary(title, sentences=sentences, auto_suggest=True, redirect=True)
        page = wikipedia.page(title, auto_suggest=True, redirect=True)
        return page.url, title, summ
    except Exception:
        # fallback direto pelo termo
        try:
            summ = wikipedia.summary(query, sentences=sentences, auto_suggest=True, redirect=True)
            page = wikipedia.page(query, auto_suggest=True, redirect=True)
            return page.url, page.title, summ
        except Exception:
            return None, None, ""

def action_wikipedia_or_summary(query, speak_summary=False):
    if speak_summary:
        url, title, summ = wiki_summary(query, sentences=2)
        if summ:
            display(speak(f"{title}. {summ}", lang='pt'))
            if url: open_new_tab(url, f"Wikipedia: {title}")
        else:
            display(speak("Não consegui obter um resumo. Abrindo a pesquisa no Wikipedia.", lang='pt'))
            action_wikipedia(query)
    else:
        action_wikipedia(query)


In [83]:
import re, urllib.parse

def action_google(query):
    q = urllib.parse.quote(query)
    open_new_tab(f"https://www.google.com/search?q={q}", f"Google: {query}")

def action_weather(city):
    q = urllib.parse.quote("tempo " + (city or ""))
    open_new_tab(f"https://www.google.com/search?q={q}", "Previsão do tempo")

def parse_intent_ptbr(text):
    t = (text or "").strip()
    tl = t.lower()

    # wikipedia + resumo (ex.: "resumo Alan Turing", "o que é aprendizado de máquina")
    if re.match(r'^(wikipedia|o que é|quem é|resumo)\b', tl):
        q = re.sub(r'^(wikipedia|o que é|quem é|resumo)\s*', '', t, flags=re.I)
        speak_sum = any(k in tl for k in ['resumo','o que é','quem é'])
        return ('wikipedia', q or ' ', {'summary': speak_sum})

    # youtube
    if re.match(r'^(youtube|tocar|buscar no youtube|abrir youtube)\b', tl):
        q = re.sub(r'^(youtube|tocar|buscar no youtube|abrir youtube)\s*', '', t, flags=re.I)
        return ('youtube', q or ' ', {})

    # farmácia
    if re.match(r'^(farmacia|farmácia)\b', tl):
        city = re.sub(r'^(farmacia|farmácia)\s*', '', t, flags=re.I)
        return ('farmacia', city.strip(), {})

    # google (pesquisa genérica)
    if re.match(r'^(google|pesquisar|buscar|abrir google)\b', tl):
        q = re.sub(r'^(google|pesquisar|buscar|abrir google)\s*', '', t, flags=re.I)
        return ('google', q or ' ', {})

    # tempo/previsão
    if re.match(r'^(tempo|previs(a|ã)o)\b', tl):
        city = re.sub(r'^(tempo|previs(a|ã)o)\s*(em|de)?\s*', '', t, flags=re.I)
        return ('tempo', city or '', {})

    if 'ajuda' in tl or 'help' in tl:
        return ('help', '', {})
    if any(w in tl for w in ['sair','encerrar','tchau','até mais']):
        return ('exit', '', {})

    return ('unknown', t, {})


In [84]:
def voice_round(seconds=5):
    print(f"[🎙️] Clique e fale por {seconds}s… (ou selecione um arquivo)")
    wav = record_microphone_ui_or_file(seconds=seconds)
    print("[🧠] Transcrevendo...")
    text = transcribe_pt_br(wav)
    print("Você disse:", text if text else "(não entendi)")

    if not text:
        display(speak("Não entendi. Pode repetir?", lang='pt'))
        return

    action, arg, opts = parse_intent_ptbr(text)

    if action == "wikipedia":
        display(speak(f"Abrindo Wikipedia sobre {arg}", lang='pt'))
        action_wikipedia_or_summary(arg, speak_summary=opts.get('summary', False))

    elif action == "youtube":
        display(speak(f"Abrindo YouTube com busca por {arg}", lang='pt'))
        action_youtube(arg)

    elif action == "farmacia":
        coords = try_browser_geolocation()
        if coords:
            display(speak("Localização obtida. Abrindo farmácias próximas.", lang='pt'))
            action_pharmacy_nearby(coords)
        else:
            if arg:
                display(speak(f"Sem localização. Usando a cidade {arg}.", lang='pt'))
                action_pharmacy_nearby(arg)
            else:
                display(speak("Sem localização. Diga por exemplo: farmacia Curitiba.", lang='pt'))
                action_help()

    elif action == "google":
        display(speak(f"Pesquisando por {arg}", lang='pt'))
        action_google(arg)

    elif action == "tempo":
        display(speak(f"Abrindo previsão do tempo {('de ' + arg) if arg else ''}", lang='pt'))
        action_weather(arg)

    elif action == "help":
        action_help(); display(speak("Listei os comandos na tela.", lang='pt'))

    elif action == "exit":
        display(speak("Até mais! Encerrando.", lang='pt'))
        return "exit"

    else:
        display(speak("Comando não reconhecido. Diga 'ajuda' para exemplos.", lang='pt'))
        action_help()

def assistant_loop(rounds=5, seconds=5):
    display(speak("Assistente pronto. Diga ajuda para ver comandos.", lang='pt'))
    for _ in range(rounds):
        if voice_round(seconds) == "exit":
            break


In [85]:
voice_round(seconds=5)
fale:  "wikipedia teoria das filas"
# ou: "youtube relógio de pulso"
# ou: "farmacia Campinas"


[🎙️] Clique e fale por 5s… (ou selecione um arquivo)


[🧠] Transcrevendo...
Você disse: Google


<IPython.core.display.Javascript object>

In [86]:
text_round("youtube bicicleta dobrável")
text_round("wikipedia aprendizado de máquina")
# text_round("resumo Alan Turing")
# text_round("tempo Porto Alegre")
# text_round("farmacia Campinas")


Comando: youtube bicicleta dobrável


<IPython.core.display.Javascript object>

Comando: wikipedia aprendizado de máquina


<IPython.core.display.Javascript object>