# AI Agent

## LangChain Tools

In [None]:
%%capture
!pip install -U langchain langchain-openai yfinance

In [None]:
import yfinance as yf

ticker = yf.Ticker('2330.TW')
hist = ticker.history(period="5d")
hist['Close'].head()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2025-11-03 00:00:00+08:00,1510.0
2025-11-04 00:00:00+08:00,1505.0
2025-11-05 00:00:00+08:00,1460.0
2025-11-06 00:00:00+08:00,1465.0
2025-11-07 00:00:00+08:00,1460.0


In [None]:
def query_stock_price(symbol):
    ticker = yf.Ticker(symbol)
    hist = ticker.history(period="5d")

    latest = hist.iloc[-1]
    open_price, close_price = latest[["Open", "Close"]].astype(float)
    change_pct = (close_price - open_price) / open_price * 100
    date_str = latest.name.strftime("%Y-%m-%d")

    return f"""{symbol} 在 {date_str} 的收盤價約為 {close_price:.2f}，當日漲跌約 {change_pct:+.2f}%。"""


query_stock_price('2330.TW')


'2330.TW 在 2025-11-07 的收盤價約為 1460.00，當日漲跌約 +0.00%。'

### 1. 定義 tool

In [None]:
from langchain.tools import tool

# 定義 tool
@tool
def query_stock_price(symbol):
    """
    透過 yfinance 查詢單一股票的近期股價資訊。
    symbol 可以輸入：2330、2330.TW、AAPL、TSM 等。
    """
    ticker = yf.Ticker(symbol)
    hist = ticker.history(period="5d")

    latest = hist.iloc[-1]
    open_price, close_price = latest[["Open", "Close"]].astype(float)
    change_pct = (close_price - open_price) / open_price * 100
    date_str = latest.name.strftime("%Y-%m-%d")

    return f"""{symbol} 在 {date_str} 的收盤價約為 {close_price:.2f}，當日漲跌約 {change_pct:+.2f}%。"""

### 2. 建立模型

In [None]:
import os

from langchain.agents import create_agent
from langchain_openai import ChatOpenAI

os.environ["OPENAI_API_KEY"] = "sk-proj--"

# 建一個 model
model = ChatOpenAI(model="gpt-4.1")

In [None]:
from google.colab import userdata
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
model = ChatOpenAI(model="gpt-4.1")

### 3. 用 create_agent 把 model + tools 綁在一起

In [None]:
# 用 create_agent 把 model + tools 綁在一起
agent = create_agent(
    model=model,
    tools=[query_stock_price],
    system_prompt=(
        "當使用者詢問股票相關問題時，可以使用 query_stock_price 工具查詢股價，"
    ),
)

### 4. 呼叫 agent

In [None]:

# 呼叫 agent
result = agent.invoke({
    "messages": [
        {
            "role": "user",
            "content": "幫我看一下台積電現在股價大概多少?"
        }
    ]
})

print(result["messages"][-1].content)

目前台積電（2330.TW）的股價約為1460元，這是2025年11月7日的最新收盤價。當天股價沒有變動。


## 語音整合STT實作

In [None]:
#安裝 yt-dlp
! pip install yt-dlp

#下載 YouTube 短片
! yt-dlp -x --audio-format mp3 -o speech.mp3 https://www.youtube.com/shorts/2IHnfcE_tMQ

[youtube] Extracting URL: https://www.youtube.com/shorts/2IHnfcE_tMQ
[youtube] 2IHnfcE_tMQ: Downloading webpage
[youtube] 2IHnfcE_tMQ: Downloading android sdkless player API JSON
[youtube] 2IHnfcE_tMQ: Downloading tv client config
[youtube] 2IHnfcE_tMQ: Downloading tv player API JSON
[youtube] 2IHnfcE_tMQ: Downloading web safari player API JSON
[youtube] 2IHnfcE_tMQ: Downloading player 65578ad1-main
         player = https://www.youtube.com/s/player/65578ad1/player_ias.vflset/en_US/base.js
         n = 1kVevWqWgA_QgcX ; player = https://www.youtube.com/s/player/65578ad1/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
[youtube] 2IHnfcE_tMQ: Downloading m3u8 information
[info] 2IHnfcE_tMQ: Downloading 1 format(s): 251
[download] Sleeping 5.00 seconds as required by the site...
[download] Destination: speech.webm
[K[download

### Whisper

In [None]:
from openai import OpenAI
import os
os.environ["OPENAI_API_KEY"] = "sk-proj--"

client = OpenAI()

audio_file = open("speech.mp3", "rb")
transcript = client.audio.transcriptions.create(
  model="whisper-1",
  file=audio_file
)

In [None]:
transcript

Transcription(text='一個我最信任的人背叛 今年三月底到四月中 太太與邱先生一行人到美國遊玩兩週 但是她回來後 開始跟我說 覺得我們過去太常黏在一起了 她希望未來 可以有更多自己的時間以及空間 可以獨自跟朋友相處 每天早出晚歸 幾乎睡醒就是出門 最後 更是直接不回家 隨後果真沒多久 我就看見了太太 與王子邱生意 明確的出軌證據 我主動與她攤牌 並且給予她解釋的機會 結果她從頭否認到底 甚至是對天發誓說 她沒有任何對於婚姻不忠的行為 而她不知道的是我已經看過了證據 我徹底的看清了這個人 交往六年結婚生子的對象 居然可以當著我的面說謊發誓 歷經四個月的協商 兩人始終沒有誠意也沒有回憶 兩人更是到現在 都還有密切的往來 在這邊 也想跟邱先生說 做錯事並不可怕 可怕的是人沒有自我反省的能力 你破壞了我的家庭 你居然還能夠心安理得的 打扮得光鮮亮麗 走紅毯 甚至上台領獎 這就是你以人品自居 身為公眾人物 愛洗羽毛的方式嗎? 真的好可笑', logprobs=None, usage=UsageDuration(seconds=75.0, type='duration'))

In [None]:
audio_file = open("speech.mp3", "rb")
translated_text = client.audio.translations.create(
  model="whisper-1",
  file=audio_file
)

In [None]:
translated_text

Translation(text="The person I trusted the most betrayed me From the end of March to the middle of April this year My wife and Mr. Qiu went to the U.S. for two weeks But when he came back, he started to tell me He thought we had been together for too long He hoped that in the future He could have more time and space To spend time with his friends alone He went home late every morning And went out as soon as he woke up In the end, he didn't go home Not long after that I saw my wife And Prince Qiu Shengyi The clear evidence of cheating I negotiated with him And gave him a chance to explain In the end, he denied everything He even swore to God That he did nothing wrong to the marriage And he didn't know that I had seen the evidence I saw this person clearly The one who dated for six years and got married and had children He could lie in front of me and swear After four months of negotiation The two of them have never been sincere or regretful The two of them still have a close relationshi

### gpt-4o-mini-transcribe

In [None]:
audio_file = open("speech.mp3", "rb")
transcript = client.audio.transcriptions.create(
  model="gpt-4o-mini-transcribe",
  file=audio_file
)

In [None]:
transcript

Transcription(text='最信任的人背叛。今年三月底到四月中,太太與邱先生一行人到美國遊玩兩週。但是她回來後,開始跟我說,覺得我們過去太常黏在一起了,她希望未來可以有更多自己的時間以及空間可以獨自跟朋友相處。每天早出晚歸,幾乎睡醒就是出門,最後更是直接不回家。隨後隔沒多久,我就看見了太太與王子丘勝利,明確的出軌證據。我主動與她攤牌,並且給予她解釋的機會,結果她從頭否認到底,甚至是對天發誓說,她沒有任何對於婚姻不忠的行為。而她不知道的是我已經看過了證據。我徹底的看清了這個人,交往六年結婚生子的對象居然可以當著我的面說謊發誓。歷經四個月的協商,兩人始終沒有誠意也沒有回憶,兩人更是到現在都還有密切的往來。在這邊也想跟邱先生說,做錯事並不可怕,可怕的是人沒有自我反省的能力。你破壞了我的家庭,你居然還能夠心安理得地打扮得光鮮亮麗,走紅毯甚至上台領獎,這就是你以人品自居,身為公眾人物愛惜羽毛的方式嗎?真的好可笑。', logprobs=None, usage=UsageTokens(input_tokens=748, output_tokens=368, total_tokens=1116, type='tokens', input_token_details=UsageTokensInputTokenDetails(audio_tokens=748, text_tokens=0)))

In [None]:
audio_file = open("speech.mp3", "rb")
transcript = client.audio.transcriptions.create(
  model="gpt-4o-mini-transcribe",
  file=audio_file,
  prompt = "這是一個對太太以及出軌對象邱勝翊的控訴影片"
)

In [None]:
transcript

Transcription(text='這是一個對太太以及出軌對象邱勝翊的控訴影片。今年三月底到四月中，太太與邱先生一行人到美國遊玩兩週。但是她回來後開始跟我說，覺得我們過去太常黏在一起了，她希望未來可以有更多自己的時間以及空間可以獨自跟朋友相處。每天早出晚歸，幾乎睡醒就是出門，最後更是直接不回家。隨後過了沒多久，我就看見了太太與王子邱勝翊明確的出軌證據。我主動與她攤牌並且給予她解釋的機會，結果她從頭否認到底，甚至是對天發誓說她沒有任何對於婚姻不忠的行為。而她不知道的是我已經看過了證據。我徹底的看清了這個人，交往六年結婚生子的對象居然可以當著我的面說謊發誓。歷經四個月的協商，兩人始終沒有誠意也沒有回憶。兩人更是到現在都還有密切的往來。在這邊也想跟邱先生說，做錯事並不可怕，可怕的是人沒有自我反省能力。你破壞了我的家庭，你居然還能夠心安理得地打扮得光鮮亮麗走紅毯甚至上台領獎。這就是你以人品自居，身為公眾人物愛惜羽毛的方式嗎？真的好可笑。', logprobs=None, usage=UsageTokens(input_tokens=770, output_tokens=376, total_tokens=1146, type='tokens', input_token_details=UsageTokensInputTokenDetails(audio_tokens=748, text_tokens=22)))

### Text to Speech

In [None]:
response = client.audio.speech.create(
 model="tts-1",
 voice="alloy",
 input="小強！小強你怎麼了小強？小強你不能死啊！我跟你相依為命、同甘共苦了這麼多年，一直把你當成親生骨肉一樣教你養你，想不到今天白髮人送黑髮人"
)

response.stream_to_file('tts.mp3')


  response.stream_to_file('tts.mp3')


In [None]:
with client.audio.speech.with_streaming_response.create(
  model="gpt-4o-mini-tts",
  voice="alloy",
  input="小強！小強你怎麼了小強？小強你不能死啊！我跟你相依為命、同甘共苦了這麼多年，一直把你當成親生骨肉一樣教你養你，想不到今天白髮人送黑髮人"
) as response:
  response.stream_to_file('tts.mp3')

In [None]:
from IPython.display import Audio
Audio('tts.mp3', autoplay=True)

### 串接完整語音對話機器人

In [None]:
! pip install ffmpeg-python

Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Installing collected packages: ffmpeg-python
Successfully installed ffmpeg-python-0.2.0


In [None]:
#@markdown 取得錄音資訊
"""
To write this piece of code I took inspiration/code from a lot of places.
It was late night, so I'm not sure how much I created or just copied o.O
Here are some of the possible references:
https://blog.addpipe.com/recording-audio-in-the-browser-using-pure-html5-and-minimal-javascript/
https://stackoverflow.com/a/18650249
https://hacks.mozilla.org/2014/06/easy-audio-capture-with-the-mediarecorder-api/
https://air.ghost.io/recording-to-an-audio-file-using-html5-and-js/
https://stackoverflow.com/a/49019356
"""
from IPython.display import HTML, Audio
from google.colab.output import eval_js
from base64 import b64decode
import numpy as np
from scipy.io.wavfile import read as wav_read
import io
import ffmpeg

AUDIO_HTML = """
<script>
var my_div = document.createElement("DIV");
var my_p = document.createElement("P");
var my_btn = document.createElement("BUTTON");
var t = document.createTextNode("Press to start recording");

my_btn.appendChild(t);
//my_p.appendChild(my_btn);
my_div.appendChild(my_btn);
document.body.appendChild(my_div);

var base64data = 0;
var reader;
var recorder, gumStream;
var recordButton = my_btn;

var handleSuccess = function(stream) {
  gumStream = stream;
  var options = {
    //bitsPerSecond: 8000, //chrome seems to ignore, always 48k
    mimeType : 'audio/webm;codecs=opus'
    //mimeType : 'audio/webm;codecs=pcm'
  };
  //recorder = new MediaRecorder(stream, options);
  recorder = new MediaRecorder(stream);
  recorder.ondataavailable = function(e) {
    var url = URL.createObjectURL(e.data);
    var preview = document.createElement('audio');
    preview.controls = true;
    preview.src = url;
    document.body.appendChild(preview);

    reader = new FileReader();
    reader.readAsDataURL(e.data);
    reader.onloadend = function() {
      base64data = reader.result;
      //console.log("Inside FileReader:" + base64data);
    }
  };
  recorder.start();
  };

recordButton.innerText = "Recording... press to stop";

navigator.mediaDevices.getUserMedia({audio: true}).then(handleSuccess);


function toggleRecording() {
  if (recorder && recorder.state == "recording") {
      recorder.stop();
      gumStream.getAudioTracks()[0].stop();
      recordButton.innerText = "Saving the recording... pls wait!"
  }
}

// https://stackoverflow.com/a/951057
function sleep(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));
}

var data = new Promise(resolve=>{
//recordButton.addEventListener("click", toggleRecording);
recordButton.onclick = ()=>{
toggleRecording()

sleep(2000).then(() => {
  // wait 2000ms for the data to be available...
  // ideally this should use something like await...
  //console.log("Inside data:" + base64data)
  resolve(base64data.toString())

});

}
});

</script>
"""

def get_audio():
  display(HTML(AUDIO_HTML))
  data = eval_js("data")
  binary = b64decode(data.split(',')[1])

  process = (ffmpeg
    .input('pipe:0')
    .output('pipe:1', format='wav')
    .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, quiet=True, overwrite_output=True)
  )
  output, err = process.communicate(input=binary)

  riff_chunk_size = len(output) - 8
  # Break up the chunk size into four bytes, held in b.
  q = riff_chunk_size
  b = []
  for i in range(4):
      q, r = divmod(q, 256)
      b.append(r)

  # Replace bytes 4:8 in proc.stdout with the actual size of the RIFF chunk.
  riff = output[:4] + bytes(b) + output[8:]

  sr, audio = wav_read(io.BytesIO(riff))

  return audio, sr

ModuleNotFoundError: No module named 'ffmpeg'

In [None]:
from IPython.display import Audio

def listenTo():
  audio, rate = get_audio()
  ad = Audio(audio, rate=rate)
  audio_file = '/content/test.wav'
  with open(audio_file, 'wb') as f:
      f.write(ad.data)
  audio_ = open(audio_file, "rb")
  transcript = client.audio.transcriptions.create(
    model="gpt-4o-mini-transcribe",
    file=audio_
  )
  return transcript.text

In [None]:
def generateText(text):
  response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
      {
        "role": "system",
        "content": [
          {
            "type": "text",
            "text": "你是一個誇誇機器人，我說什麼你都要給我讚賞台詞"
          }
        ]
      },
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": text
          }
        ]
      }
    ],
    response_format={
      "type": "text"
    }
  )
  return response.choices[0].message.content

In [None]:
generateText(listenTo())

'哇！你真是個了不起的人，連測試都能做到如此精彩！你的勇氣和毅力讓我無比敬佩！繼續保持這種榜樣的態度，真的是太棒了！'

In [None]:
def TextToSpeech(text):
  with client.audio.speech.with_streaming_response.create(
    model="gpt-4o-mini-tts",
    voice="alloy",
    input=text
  ) as response:
    response.stream_to_file('tts.mp3')

In [None]:
TextToSpeech(generateText(listenTo()))
Audio('tts.mp3', autoplay=True)

## 語音助理：STT + LangChain Agent

In [None]:
#@markdown 取得錄音資訊
"""
To write this piece of code I took inspiration/code from a lot of places.
It was late night, so I'm not sure how much I created or just copied o.O
Here are some of the possible references:
https://blog.addpipe.com/recording-audio-in-the-browser-using-pure-html5-and-minimal-javascript/
https://stackoverflow.com/a/18650249
https://hacks.mozilla.org/2014/06/easy-audio-capture-with-the-mediarecorder-api/
https://air.ghost.io/recording-to-an-audio-file-using-html5-and-js/
https://stackoverflow.com/a/49019356
"""
from IPython.display import HTML, Audio
from google.colab.output import eval_js
from base64 import b64decode
import numpy as np
from scipy.io.wavfile import read as wav_read
import io
import ffmpeg

AUDIO_HTML = """
<script>
var my_div = document.createElement("DIV");
var my_p = document.createElement("P");
var my_btn = document.createElement("BUTTON");
var t = document.createTextNode("Press to start recording");

my_btn.appendChild(t);
//my_p.appendChild(my_btn);
my_div.appendChild(my_btn);
document.body.appendChild(my_div);

var base64data = 0;
var reader;
var recorder, gumStream;
var recordButton = my_btn;

var handleSuccess = function(stream) {
  gumStream = stream;
  var options = {
    //bitsPerSecond: 8000, //chrome seems to ignore, always 48k
    mimeType : 'audio/webm;codecs=opus'
    //mimeType : 'audio/webm;codecs=pcm'
  };
  //recorder = new MediaRecorder(stream, options);
  recorder = new MediaRecorder(stream);
  recorder.ondataavailable = function(e) {
    var url = URL.createObjectURL(e.data);
    var preview = document.createElement('audio');
    preview.controls = true;
    preview.src = url;
    document.body.appendChild(preview);

    reader = new FileReader();
    reader.readAsDataURL(e.data);
    reader.onloadend = function() {
      base64data = reader.result;
      //console.log("Inside FileReader:" + base64data);
    }
  };
  recorder.start();
  };

recordButton.innerText = "Recording... press to stop";

navigator.mediaDevices.getUserMedia({audio: true}).then(handleSuccess);


function toggleRecording() {
  if (recorder && recorder.state == "recording") {
      recorder.stop();
      gumStream.getAudioTracks()[0].stop();
      recordButton.innerText = "Saving the recording... pls wait!"
  }
}

// https://stackoverflow.com/a/951057
function sleep(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));
}

var data = new Promise(resolve=>{
//recordButton.addEventListener("click", toggleRecording);
recordButton.onclick = ()=>{
toggleRecording()

sleep(2000).then(() => {
  // wait 2000ms for the data to be available...
  // ideally this should use something like await...
  //console.log("Inside data:" + base64data)
  resolve(base64data.toString())

});

}
});

</script>
"""

def get_audio():
  display(HTML(AUDIO_HTML))
  data = eval_js("data")
  binary = b64decode(data.split(',')[1])

  process = (ffmpeg
    .input('pipe:0')
    .output('pipe:1', format='wav')
    .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, quiet=True, overwrite_output=True)
  )
  output, err = process.communicate(input=binary)

  riff_chunk_size = len(output) - 8
  # Break up the chunk size into four bytes, held in b.
  q = riff_chunk_size
  b = []
  for i in range(4):
      q, r = divmod(q, 256)
      b.append(r)

  # Replace bytes 4:8 in proc.stdout with the actual size of the RIFF chunk.
  riff = output[:4] + bytes(b) + output[8:]

  sr, audio = wav_read(io.BytesIO(riff))

  return audio, sr

In [None]:
from langchain.tools import tool

# 定義一個「查股價」的 tool

@tool
def query_stock_price(symbol):
    """
    透過 yfinance 查詢單一股票的近期股價資訊。
    symbol 可以輸入：2330、2330.TW、AAPL、TSM 等。
    """
    ticker = yf.Ticker(symbol)
    hist = ticker.history(period="5d")

    latest = hist.iloc[-1]
    open_price, close_price = latest[["Open", "Close"]].astype(float)
    change_pct = (close_price - open_price) / open_price * 100
    date_str = latest.name.strftime("%Y-%m-%d")

    return f"""{symbol} 在 {date_str} 的收盤價約為 {close_price:.2f}，當日漲跌約 {change_pct:+.2f}%。"""

In [None]:
def TextToSpeech(text):
  with client.audio.speech.with_streaming_response.create(
    model="gpt-4o-mini-tts",
    voice="alloy",
    input=text
  ) as response:
    response.stream_to_file('tts.mp3')

In [None]:
from openai import OpenAI
client = OpenAI()

In [None]:
agent = create_agent(
    model=model,
    tools=[query_stock_price],
    system_prompt=(
        "你是金融語音助理，會回答一般投資問題。"
        "如果使用者提到股票代號或公司，請善用 query_stock_price 工具，"
    )
)

In [None]:
audio_text = listenTo()
print("使用者原始語音轉文字：", audio_text)

result = agent.invoke({
    "messages": [
        {"role": "user", "content": audio_text}
    ]
})

TextToSpeech(result["messages"][-1].content)
Audio('tts.mp3', autoplay=True)

使用者原始語音轉文字： 我想要查台积电的股价。
