## 使用Pip 安裝第三方套件

### 讓電腦講話

In [None]:
! pip install gTTS

In [248]:
from gtts import gTTS
tts = gTTS(text='九四八七九四狂', lang='zh')
tts.save("hello.mp3")

In [249]:
! pip install pygame



In [251]:
from gtts import gTTS
from pygame import mixer
mixer.init()

import tempfile
def speak(sentence):
    with tempfile.NamedTemporaryFile(delete=True) as fp:
        tts = gTTS(text=sentence, lang='zh')
        tts.save("{}.mp3".format(fp.name))
        mixer.music.load('{}.mp3'.format(fp.name))
        mixer.music.play()
        
speak('今天很高興來到貴寶地')

### 讓電腦聽懂我們說的話

In [252]:
! pip install SpeechRecognition



In [346]:
! pip install PyAudio



In [256]:
import speech_recognition as sr

# obtain audio from the microphone
r = sr.Recognizer()
with sr.Microphone() as source:
    print("Say something!")
    audio = r.listen(source)

Say something!


In [257]:
r.recognize_google(audio, language='zh-TW')

'大家聖誕快樂'

In [258]:
def listenToMe():
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("Say something!")
        audio = r.listen(source)
    return r.recognize_google(audio, language='zh-TW')

In [260]:
qa = {'聖誕快樂': '叮叮噹 叮叮噹 鈴聲多響亮',
     '新年快樂' : '恭喜發財, 紅包拿來'}

for q in qa:
    if q in listenToMe():
        speak(qa.get(q))

Say something!
Say something!


### 寫個簡單的Wiki 爬蟲

In [271]:
import requests
from bs4 import BeautifulSoup
term = '星際大戰'
res = requests.get('https://zh.wikipedia.org/wiki/{}'.format(term))
soup = BeautifulSoup(res.text, 'lxml')


In [272]:
soup.select_one('.mw-parser-output > p').text

'《星球大战》（英语：Star Wars），中文可簡稱「星戰」，是美國導演兼編劇乔治·卢卡斯所構思拍攝的一系列科幻電影。同时“星球大戰”也是该系列電影最早拍摄上映的第四章故事的原始片名。'

In [277]:
def crawlWiki(term):
    try:
        res = requests.get('https://zh.wikipedia.org/wiki/{}'.format(term))
        soup = BeautifulSoup(res.text, 'lxml')
        return soup.select_one('.mw-parser-output > p').text
    except:
        return '我現在不知道你在說什麼,但我正在學習,以後變聰明了就可以回答你'

In [278]:
speak(crawlWiki(listenToMe()))

Say something!


In [274]:
crawlWiki('星際大戰')

'《星球大战》（英语：Star Wars），中文可簡稱「星戰」，是美國導演兼編劇乔治·卢卡斯所構思拍攝的一系列科幻電影。同时“星球大戰”也是该系列電影最早拍摄上映的第四章故事的原始片名。'

### 抓取網路問答

In [283]:
import requests
from bs4 import BeautifulSoup
import pandas
res = requests.get('https://www.fubon.com/banking/FAQ_Data//faq/index_data/faqData1.xml')
soup = BeautifulSoup(res.text, 'lxml')
qa = []
for item in soup.select('item'):
    qa.append({'question':item.select_one('title').text.strip(), 'answer':item.select_one('description').text.strip()})

In [352]:
import pandas
df = pandas.DataFrame(qa)
df.head(1)

Unnamed: 0,answer,question
0,感謝您申辦本行信用卡，如您尚需補件，請您備妥相關資料並在文件上註明「身分證字號」、「聯絡電話...,申請信用卡有缺少資料應如何補件？


In [338]:
import jieba
corpus = [' '.join(jieba.cut('信用卡有缺資料, 請問應如何補件'))]
questions = ['DUMMY']
answers = ['DUMMY']
for rec in df.iterrows():
    corpus.append(' '.join(jieba.cut(rec[1].question)))
    answers.append(rec[1].answer)
    questions.append(rec[1].question)

In [339]:
from sklearn.feature_extraction.text import CountVectorizer
v = CountVectorizer()
X = v.fit_transform(corpus)

In [340]:
from sklearn.metrics.pairwise import cosine_similarity
cs= cosine_similarity(X[0],X)

In [347]:
#answers[cs.argsort().flatten()[::-1][1]]

In [344]:
import jieba
def getSimiliarAnswer(q):
    corpus = [' '.join(jieba.cut(q))]
    questions = ['DUMMY']
    answers = ['DUMMY']
    for rec in df.iterrows():
        corpus.append(' '.join(jieba.cut(rec[1].question)))
        answers.append(rec[1].answer)
        questions.append(rec[1].question)
    from sklearn.feature_extraction.text import CountVectorizer
    v = CountVectorizer()
    X = v.fit_transform(corpus)

    cs= cosine_similarity(X[0],X)
    return answers[cs.argsort().flatten()[::-1][1]]

In [345]:
speak(getSimiliarAnswer(listenToMe()))

Say something!
