## 詞頻矩陣

In [2]:
content = ["How to format my hard disk", 
           "Hard disk format problems "]

In [6]:
words0 = content[0].lower().split()
words0

['how', 'to', 'format', 'my', 'hard', 'disk']

In [7]:
words1 = content[1].lower().split()
words1

['hard', 'disk', 'format', 'problems']

In [9]:
words = set(words0) | set(words1)

In [10]:
len(words)

7

In [14]:
import numpy 
m = numpy.zeros((2,7))
m

array([[0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.]])

In [17]:
word_to_id = {}
id_to_word = {}
for idx, w in enumerate(words):
    #print(idx, w)
    word_to_id[w]   = idx
    id_to_word[idx] = w

In [18]:
word_to_id

{'how': 0, 'to': 1, 'my': 2, 'disk': 3, 'hard': 4, 'problems': 5, 'format': 6}

In [19]:
id_to_word

{0: 'how', 1: 'to', 2: 'my', 3: 'disk', 4: 'hard', 5: 'problems', 6: 'format'}

In [22]:
for w in words0:
    m[0,word_to_id[w]] = 1

In [24]:
word_to_id

{'how': 0, 'to': 1, 'my': 2, 'disk': 3, 'hard': 4, 'problems': 5, 'format': 6}

In [23]:
m

array([[1., 1., 1., 1., 1., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0.]])

In [25]:
for w in words1:
    m[1,word_to_id[w]] = 1

In [26]:
m

array([[1., 1., 1., 1., 1., 0., 1.],
       [0., 0., 0., 1., 1., 1., 1.]])

In [27]:
m.shape

(2, 7)

In [32]:
import math
math.sqrt(((m[0] - m[1]) ** 2).sum())

2.0

## 使用 sklearn 的 CountVectorizer

In [None]:
content = ["How to format my hard disk", 
           "Hard disk format problems "]

In [34]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
# fit: 產生結構
# transform: 轉換
# fit_transform: 產生結構並轉換
X= vectorizer.fit_transform(content) 

In [35]:
X

<2x7 sparse matrix of type '<class 'numpy.int64'>'
	with 10 stored elements in Compressed Sparse Row format>

In [37]:
print(vectorizer.get_feature_names())

['disk', 'format', 'hard', 'how', 'my', 'problems', 'to']


In [36]:
X.toarray()

array([[1, 1, 1, 1, 1, 0, 1],
       [1, 1, 1, 0, 0, 1, 0]], dtype=int64)

## 利用相似度檢索

In [38]:
contents = ['This is a toy post about machine learning. Actually, it contains not much interesting stuff.',
'Imaging databases can get huge.',
'Most imaging databases safe images permanently.',
'Imaging databases store images.',
'Imaging databases store images. Imaging databases store images. Imaging databases store images.']

In [39]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(contents)

In [40]:
X

<5x24 sparse matrix of type '<class 'numpy.int64'>'
	with 33 stored elements in Compressed Sparse Row format>

In [42]:
print(vectorizer.get_feature_names())

['about', 'actually', 'can', 'contains', 'databases', 'get', 'huge', 'images', 'imaging', 'interesting', 'is', 'it', 'learning', 'machine', 'most', 'much', 'not', 'permanently', 'post', 'safe', 'store', 'stuff', 'this', 'toy']


In [41]:
X.toarray()

array([[1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1,
        1, 1],
       [0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0],
       [0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,
        0, 0],
       [0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
        0, 0],
       [0, 0, 0, 0, 3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
        0, 0]], dtype=int64)

In [44]:
new_post = 'imaging database'
new_post_vec = vectorizer.transform([new_post])

In [45]:
new_post_vec

<1x24 sparse matrix of type '<class 'numpy.int64'>'
	with 1 stored elements in Compressed Sparse Row format>

In [46]:
new_post_vec.toarray()

array([[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0]])

In [47]:
import scipy as sp

def dist_raw(v1, v2):
    delta = v1-v2
    return sp.linalg.norm(delta.toarray())


In [54]:
math.sqrt(((X[0] - new_post_vec).toarray() ** 2).sum())

3.872983346207417

In [49]:
dist_raw(X[0], new_post_vec)

3.872983346207417

In [55]:
for i in range(5):
    d = dist_raw(X[i], new_post_vec)
    print(contents[i], d)

This is a toy post about machine learning. Actually, it contains not much interesting stuff. 3.872983346207417
Imaging databases can get huge. 2.0
Most imaging databases safe images permanently. 2.23606797749979
Imaging databases store images. 1.7320508075688772
Imaging databases store images. Imaging databases store images. Imaging databases store images. 5.5677643628300215


## 使用相對距離

In [56]:
def dist(v1, v2):
    v1_normalized  = v1 / sp.linalg.norm(v1.toarray()) 
    v2_normalized  = v2 / sp.linalg.norm(v2.toarray())
    delta = v1_normalized - v2_normalized
    return sp.linalg.norm(delta.toarray())

In [57]:
for i in range(5):
    d = dist(X[i], new_post_vec)
    print(contents[i], d)

This is a toy post about machine learning. Actually, it contains not much interesting stuff. 1.414213562373095
Imaging databases can get huge. 1.0514622242382672
Most imaging databases safe images permanently. 1.0878894332937856
Imaging databases store images. 1.0
Imaging databases store images. Imaging databases store images. Imaging databases store images. 1.0


## Stopwords

In [58]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(stop_words='english')
X = vectorizer.fit_transform(contents)

In [59]:
X

<5x15 sparse matrix of type '<class 'numpy.int64'>'
	with 24 stored elements in Compressed Sparse Row format>

In [60]:
new_post = 'imaging database'
new_post_vec = vectorizer.transform([new_post])

In [62]:
print(vectorizer.get_feature_names())

['actually', 'contains', 'databases', 'huge', 'images', 'imaging', 'interesting', 'learning', 'machine', 'permanently', 'post', 'safe', 'store', 'stuff', 'toy']


In [61]:
for i in range(5):
    d = dist(X[i], new_post_vec)
    print(contents[i], d)

This is a toy post about machine learning. Actually, it contains not much interesting stuff. 1.4142135623730951
Imaging databases can get huge. 0.9194016867619662
Most imaging databases safe images permanently. 1.0514622242382672
Imaging databases store images. 1.0
Imaging databases store images. Imaging databases store images. Imaging databases store images. 1.0


## Stemming

In [63]:
import nltk.stem
s = nltk.stem.SnowballStemmer('english')
s.stem('graphics')

'graphic'

In [64]:
print(s.stem("imaging"))
print(s.stem("image"))
print(s.stem("imagination"))
print(s.stem("imagine"))


imag
imag
imagin
imagin


In [None]:
import nltk.stem
english_stemmer = nltk.stem.SnowballStemmer('english')

class StemmedCountVectorizer(CountVectorizer):
    def build_analyzer(self):
        analyzer = super(StemmedCountVectorizer, self).build_analyzer()
        return lambda doc: (english_stemmer.stem(w) for w in analyzer(doc))


In [66]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = StemmedCountVectorizer(stop_words='english')
X = vectorizer.fit_transform(contents)

In [68]:
print(vectorizer.get_feature_names())

['actual', 'contain', 'databas', 'huge', 'imag', 'interest', 'learn', 'machin', 'perman', 'post', 'safe', 'store', 'stuff', 'toy']


In [67]:
X

<5x14 sparse matrix of type '<class 'numpy.int64'>'
	with 21 stored elements in Compressed Sparse Row format>

In [69]:
new_post = 'imaging database'
new_post_vec = vectorizer.transform([new_post])

In [70]:
new_post_vec

<1x14 sparse matrix of type '<class 'numpy.int64'>'
	with 2 stored elements in Compressed Sparse Row format>

In [71]:
new_post_vec.toarray()

array([[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [72]:
for i in range(5):
    d = dist(X[i], new_post_vec)
    print(contents[i], d)

This is a toy post about machine learning. Actually, it contains not much interesting stuff. 1.414213562373095
Imaging databases can get huge. 0.6058108930553725
Most imaging databases safe images permanently. 0.6296288974669553
Imaging databases store images. 0.5176380902050415
Imaging databases store images. Imaging databases store images. Imaging databases store images. 0.5176380902050415


## TF-IDF Vectorizer

In [73]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(contents)

In [74]:
new_post = 'imaging database'
new_post_vec = vectorizer.transform([new_post])

In [75]:
for i in range(5):
    d = dist(X[i], new_post_vec)
    print(contents[i], d)

This is a toy post about machine learning. Actually, it contains not much interesting stuff. 1.4142135623730951
Imaging databases can get huge. 1.0577078978082235
Most imaging databases safe images permanently. 1.1654660300609625
Imaging databases store images. 1.0697577475609328
Imaging databases store images. Imaging databases store images. Imaging databases store images. 1.0697577475609328


## Inheritence

In [76]:
class Dog(object):
    
    def __init__(self, name):
        self.name = name 
        
    def eat(self):
        return 'I am eating'
    
    def woof(self):
        return 'won won'

class Cat(object):
    def eat(self):
        return 'I am eating'
    
    def meow(self):
        return 'meow meow'

In [77]:
class Animal(object):
    def eat(self):
        return 'I am eating'

class Dog(Animal):
    
    def woof(self):
        return 'won won'

class Cat(Animal):
    def meow(self):
        return 'meow meow'

## StemmedTfIDFVectorizer

In [79]:
from sklearn.feature_extraction.text import TfidfVectorizer

class StemmedTfidfVectorizer(TfidfVectorizer):
    def build_analyzer(self):
        analyzer = super(TfidfVectorizer,self).build_analyzer()
        return lambda doc: (english_stemmer.stem(w) for w in analyzer(doc))


In [80]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = StemmedTfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(contents)

In [81]:
new_post = 'imaging database'
new_post_vec = vectorizer.transform([new_post])

In [82]:
for i in range(5):
    d = dist(X[i], new_post_vec)
    print(contents[i], d)

This is a toy post about machine learning. Actually, it contains not much interesting stuff. 1.4142135623730951
Imaging databases can get huge. 0.8681697052899789
Most imaging databases safe images permanently. 0.859044512133176
Imaging databases store images. 0.634205801303706
Imaging databases store images. Imaging databases store images. Imaging databases store images. 0.634205801303706


## 中文詞頻矩陣

In [84]:
import jieba
jieba.load_userdict('userdict.txt')

contents = ['柯文哲為了大巨蛋一事找趙藤雄算帳', 
     '柯P將不在大巨蛋舉辦世運會',
     '今天天氣真好']


In [86]:
corpus = []
for rec in contents:
    s = ' '.join(jieba.cut(rec))
    corpus.append(s)

In [87]:
corpus

['柯文哲 為了 大巨蛋 一 事 找 趙藤雄 算帳', '柯P 將 不 在 大巨蛋 舉辦 世運會', '今天 天氣 真好']

In [88]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(corpus)

In [89]:
X

<3x11 sparse matrix of type '<class 'numpy.int64'>'
	with 12 stored elements in Compressed Sparse Row format>

In [91]:
print(vectorizer.get_feature_names())

['世運會', '今天', '大巨蛋', '天氣', '柯p', '柯文哲', '為了', '真好', '算帳', '舉辦', '趙藤雄']


In [90]:
X.toarray()

array([[0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0],
       [0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0]], dtype=int64)

In [92]:
from sklearn.metrics.pairwise import euclidean_distances
ed = euclidean_distances(X)

In [93]:
ed

array([[0.        , 2.64575131, 2.82842712],
       [2.64575131, 0.        , 2.64575131],
       [2.82842712, 2.64575131, 0.        ]])

In [94]:
from sklearn.metrics.pairwise import cosine_distances
cs = cosine_distances(X)

In [95]:
cs

array([[0.       , 0.7763932, 1.       ],
       [0.7763932, 0.       , 1.       ],
       [1.       , 1.       , 0.       ]])

In [96]:
from sklearn.metrics.pairwise import cosine_similarity
cs = cosine_similarity(X)

In [97]:
cs

array([[1.       , 0.2236068, 0.       ],
       [0.2236068, 1.       , 0.       ],
       [0.       , 0.       , 1.       ]])

## 維基百科爬蟲

In [98]:
import requests
res = requests.get('https://zh.wikipedia.org/wiki/%E6%9F%AF%E6%96%87%E5%93%B2')

In [99]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(res.text, 'lxml')

In [112]:
s  = '/'.join([b.text for b in soup.select('.mw-parser-output p')[6].select('b')])

In [113]:
s

'柯文哲/柯P/KP'

In [114]:
with open('synonym.txt', 'w') as f:
    f.write(s)

In [115]:
synonym_dic = {}
for s in open('synonym.txt'):
    synonym = s.strip().split('/')
    for w in synonym[1:]:
        synonym_dic[w.lower()]  = synonym[0]
synonym_dic


{'柯p': '柯文哲', 'kp': '柯文哲'}

In [116]:
import nltk.stem

class SynonymCountVectorizer(CountVectorizer):
    def build_analyzer(self):
        analyzer = super(SynonymCountVectorizer, self).build_analyzer()
        return lambda doc: (synonym_dic.get(w, w) for w in analyzer(doc))
		
vectorizer = SynonymCountVectorizer()
X = vectorizer.fit_transform(corpus)
print(vectorizer.get_feature_names())


['世運會', '今天', '大巨蛋', '天氣', '柯文哲', '為了', '真好', '算帳', '舉辦', '趙藤雄']


In [117]:
X.toarray()

array([[0, 0, 1, 0, 1, 1, 0, 1, 0, 1],
       [1, 0, 1, 0, 1, 0, 0, 0, 1, 0],
       [0, 1, 0, 1, 0, 0, 1, 0, 0, 0]], dtype=int64)

In [118]:
from sklearn.metrics.pairwise import cosine_distances
cs = cosine_distances(X)

In [119]:
cs

array([[0.       , 0.5527864, 1.       ],
       [0.5527864, 0.       , 1.       ],
       [1.       , 1.       , 0.       ]])

In [120]:
stopwords = ['為了', '一事', '不在']


In [121]:

vectorizer = SynonymCountVectorizer(stop_words=stopwords)
X = vectorizer.fit_transform(corpus)
print(vectorizer.get_feature_names())


['世運會', '今天', '大巨蛋', '天氣', '柯文哲', '真好', '算帳', '舉辦', '趙藤雄']


In [122]:
from sklearn.metrics.pairwise import cosine_distances
cs = cosine_distances(X)
cs

array([[0. , 0.5, 1. ],
       [0.5, 0. , 1. ],
       [1. , 1. , 0. ]])

## 新聞推薦引擎

In [123]:
import pandas
news = pandas.read_excel('https://raw.githubusercontent.com/ywchiu/obanktm/master/data/20171214news.xlsx')

In [128]:
corpus = []
titles = []
for rec in news.iterrows():
    titles.append(rec[1]['title'])
    corpus.append(' '.join(jieba.cut(rec[1]['content'])))
    #break

In [129]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(corpus)

In [130]:
X

<899x28942 sparse matrix of type '<class 'numpy.int64'>'
	with 122260 stored elements in Compressed Sparse Row format>

In [131]:
from sklearn.metrics.pairwise import cosine_distances
cs = cosine_distances(X)

In [132]:
cs.shape

(899, 899)

In [134]:
a = numpy.array([80,70,90,65,88])
a.argsort()

array([3, 1, 0, 4, 2])

In [139]:
for idx  in cs[0].argsort()[1:10]:
    if cs[0][idx] < 0.75:
        print(titles[idx], cs[0][idx])

反對水利會改制　吳敦義下令：藍委做好夜宿立院抗爭準備 0.6536245242470944
農田水利會改公務機關　蔡英文：這不是綁樁 0.6791340621697286
國防部解約慶富要提告　馮世寬怒嗆三遍：請便 0.7412839482000559
罷免案將投票　李遠哲今再度現身力挺黃國昌 0.7452540222665425


In [140]:
def getSimiliarArticle(pos):
    print('查詢文章：', titles[pos])
    for idx  in cs[pos].argsort()[1:10]:
        if cs[pos][idx] < 0.75:
            print('相關文章：', titles[idx], cs[pos][idx])

In [146]:
getSimiliarArticle(12)

查詢文章： 黃國昌：長期目標要消滅國民黨　不支持柯文哲的兩岸一家親
相關文章： 黃國昌若被罷免會更強　他：成為選輸北市的阿扁 0.5519166829972268
相關文章： 罷免案將投票　李遠哲今再度現身力挺黃國昌 0.5752596978859429
相關文章： 「兩蔣時代」超譯　網友：不准傷皇城內的和氣！ 0.5938224838071139
相關文章： 罷昌案周六投票　時代力量全力澄清不實謠言 0.6402804718905146
相關文章： 【聲援片】沈發惠呼籲罷昌案投「不同意」　黃國昌：謝謝您 0.7003246342755608


##  檢索文章

In [148]:
query = '我要辦信用卡'
query_vec = ' '.join(jieba.cut(query))

In [149]:
q =  vectorizer.transform([query_vec])

In [150]:
q

<1x28942 sparse matrix of type '<class 'numpy.int64'>'
	with 1 stored elements in Compressed Sparse Row format>

In [152]:
cs = cosine_distances(q, X)

In [154]:
cs.shape

(1, 899)

In [157]:
cs.argsort().flatten()[0]

780

In [158]:
titles[780]

'行動支付破百億元'

In [179]:
def getSimiliarArticleByTerm(query):
    query_vec = ' '.join(jieba.cut(query))
    q =  vectorizer.transform([query_vec])
    cs = cosine_distances(q, X)
    #print(cs.shape)
    #print(cs.argsort())
    for idx  in cs.argsort().flatten()[0:10]:
        if cs[0][idx] < 0.9:
            print(titles[idx], cs[0][idx])
        #if cs[pos][idx] < 0.75:
        #print('相關文章：', titles[idx], cs[pos][idx])

In [180]:
getSimiliarArticleByTerm('銀行')

被控對金管會施壓　高銀：曾銘宗抹黑擬提告 0.7486876550249827
數位化衝擊　張兆順：外銀認為台灣最適分行數60家 0.7566038082443822
遠東商銀到底違反什麼法？金管會恐有未依法裁罰之虞 0.8028192483187253
北韓高仿假鈔流入南韓　專家驚：近乎完美 0.8071526960400325
黃國昌爆高雄銀替慶富不實增資　公司：非事實 0.830678634630351
Apple等三大Pay　綁卡數達223萬張 0.8560368498502611
曾銘宗爆「好大的高雄銀」　高雄銀嗆：保留法律追訴權 0.8875196823335381


## 抓取常見問題集

In [183]:
import requests
from bs4 import BeautifulSoup
res = requests.get('https://www.o-bank.com/retail/event/event-faq')
soup = BeautifulSoup(res.text, 'lxml')

In [195]:
ary = []
for rec in soup.select('.select_body'):
    if rec.select_one('.content'):
        question = rec.select_one('h4').text
        answer = rec.select_one('.content').text
        ary.append({'question':question, 'answer': answer.strip()})
    #break

In [197]:
import pandas
df = pandas.DataFrame(ary)
df.to_excel('obank_qa.xlsx')

## 問題

請各位寫個相似度比對引擎
當我輸入問題：請問我要如何得到傳說對決虛寶?
機器人是否能根據已知QA (obank_qa.xlsx)告知我相似度最高之對應答案?

In [201]:
import pandas
qa = pandas.read_excel('/Users/davidchiu/course/obanktm/obank_qa.xlsx', index_col=0)
#qa = pandas.read_excel('obank_qa.xlsx', index_col=0)
qa.head()

Unnamed: 0,answer,question
0,不需要\r\n\t \...,如果移除並重新安裝O-Bank行動銀行APP，是否需重新啟用「設備綁定/生物辨識」服務?
1,隨時隨地享受金融服務\r\n\t ...,用網路銀行有甚麼好處？
2,依據交易類別，透過網銀密碼或簡訊OTP驗證即可轉帳\r\n\t ...,如何使用網路/行動銀行進行轉帳？
3,需使用智慧型手機/行動裝置\n\n您的智慧型手機/行動裝置必須具備GPS功能，且限在本行行動...,使用搖一搖進行收付款，有設備的限制嗎?
4,請至網路/行動銀行：個人設定/轉帳相關設定/手機號碼就是帳號，進行設定。\n\n設定完成後，...,如何設定「手機號碼就是帳號」功能？


In [206]:
qa.dropna(inplace=True)

In [219]:
corpus = []
questions = []
answers = []
for rec in qa.iterrows():
    q = ' '.join(jieba.cut(rec[1]['question']))
    a = rec[1]['answer']
    corpus.append(q)
    answers.append(a)
    questions.append(q)

In [220]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(corpus)

In [237]:
def getSimiliarAnswerByTerm(query):
    query_vec = ' '.join(jieba.cut(query))
    #print(query_vec)
    q =  vectorizer.transform([query_vec])
    cs = cosine_distances(q, X)
    idx = cs.argsort().flatten()[0]
    print(questions[idx])
    print(answers[idx])

In [238]:
getSimiliarAnswerByTerm('請問我要如何得到傳說對決虛寶? ')

「 傳說 對決 聯名 卡 」 新 戶 活動 「 虛寶 」 如何 獲得 ?
於2018/1/18起至2019/1/18前完成開戶且申辦「傳說對決聯名卡」的新戶，於開戶日後30天內完成「刷卡消費累積NT500元」或「一卡通自動加值NT500元」則符合獲得虛寶四選一的資格。
完成刷卡或一卡通自動值累積NT500元交易後「3個營業日」(不含假日)內，發送虛寶兌獎簡訊通知。 
完成「虛寶四選一」的選擇後「3個營業日」(不含假日)內，發送虛寶兌換序號簡訊通知，再至「Garena傳說對決遊戲」裡兌換。
    (進入《Garena 傳說對決》遊戲－>點選禮物盒圖像－>遊戲公告－>活動序號兌換處－>去完成－>輸入序號－>兌換)
任何序號兌換問題，請洽Garena客服中心詢問。
完整「傳說對決聯名卡」請詳活動說明


In [239]:
getSimiliarAnswerByTerm('如何查詢存款帳號? ')

我 要 怎麼 查詢 O - Bank 存款 帳號 ?
金融卡背面或登入O-Bank行動銀行
	                                    


1.金融卡背面印有O-Bank存款帳戶之帳號（請留意! 金融卡正面為卡號，不是存款帳號喔）。

2.登入O-Bank行動銀行後查詢「我的帳戶」
