<h1><center>Flair - Embeddings</center></h1>

In [1]:
from time import time

import flair
from flair.data import Sentence

## [Word Embeddings](https://github.com/zalandoresearch/flair/blob/master/resources/docs/TUTORIAL_3_WORD_EMBEDDING.md)

+ Glove
+ FastText-Chinese

In [2]:
from flair.embeddings import WordEmbeddings

In [3]:
embedding = WordEmbeddings('glove')
sentence = Sentence('The grass is green .')
embedding.embed(sentence)

for token in sentence:
    print(token, token.embedding.shape)

Token: 1 The torch.Size([100])
Token: 2 grass torch.Size([100])
Token: 3 is torch.Size([100])
Token: 4 green torch.Size([100])
Token: 5 . torch.Size([100])


In [4]:
embedding = WordEmbeddings('zh')
sentence = Sentence('我 喜歡 吃 水果 。')
embedding.embed(sentence)

for token in sentence:
    print(token, token.embedding.shape)

Token: 1 我 torch.Size([300])
Token: 2 喜歡 torch.Size([300])
Token: 3 吃 torch.Size([300])
Token: 4 水果 torch.Size([300])
Token: 5 。 torch.Size([300])


## [Flair Embeddings](https://github.com/zalandoresearch/flair/blob/master/resources/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md)

+ news-forward
+ StackedEmbedding

In [5]:
from flair.embeddings import FlairEmbeddings

In [6]:
embedding = FlairEmbeddings('news-forward')
sentence = Sentence('The grass is green .')
embedding.embed(sentence)

for token in sentence:
    print(token, token.embedding.shape)

Token: 1 The torch.Size([2048])
Token: 2 grass torch.Size([2048])
Token: 3 is torch.Size([2048])
Token: 4 green torch.Size([2048])
Token: 5 . torch.Size([2048])


In [7]:
from flair.embeddings import StackedEmbeddings

embedding = StackedEmbeddings([WordEmbeddings('glove'), FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward')])
sentence = Sentence('The grass is green .')
embedding.embed(sentence)

for token in sentence:
    print(token, token.embedding.shape)

Token: 1 The torch.Size([4196])
Token: 2 grass torch.Size([4196])
Token: 3 is torch.Size([4196])
Token: 4 green torch.Size([4196])
Token: 5 . torch.Size([4196])


## [ELMo Embeddings](https://github.com/zalandoresearch/flair/blob/master/resources/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md)

+ ELMo

In [8]:
from flair.embeddings import ELMoEmbeddings

embedding = ELMoEmbeddings()

  index_range = sequence_lengths.new_tensor(torch.arange(0, len(sequence_lengths)))


In [9]:
sentence1 = Sentence('The grass is green .')
sentence2 = Sentence('The grava is green .')

embedding.embed(sentence1)
embedding.embed(sentence2)

for token in sentence1:
    print(token, token.embedding.shape)

print('---------')
print(sentence1[0].text, ',', sentence1[0].embedding)
print(sentence2[0].text, ',', sentence2[0].embedding)
print('---------')
print(sentence1[1].text, ',', sentence1[1].embedding)
print(sentence2[1].text, ',', sentence2[1].embedding)
print('---------')
print(sentence1[2].text, ',', sentence1[2].embedding)
print(sentence2[2].text, ',', sentence2[2].embedding)
print('---------')
print(sentence1[3].text, ',', sentence1[3].embedding)
print(sentence2[3].text, ',', sentence2[3].embedding)

  index_range = sequence_lengths.new_tensor(torch.arange(0, len(sequence_lengths)))


Token: 1 The torch.Size([3072])
Token: 2 grass torch.Size([3072])
Token: 3 is torch.Size([3072])
Token: 4 green torch.Size([3072])
Token: 5 . torch.Size([3072])
---------
The , tensor([-0.3288,  0.2022, -0.5940,  ..., -1.2773,  0.3049,  0.2150])
The , tensor([-0.3288,  0.2022, -0.5940,  ..., -1.4559,  0.2238,  0.1010])
---------
grass , tensor([ 0.2539, -0.2363,  0.5263,  ..., -0.7001,  0.8798,  1.4191])
grava , tensor([ 0.0642, -0.4907,  0.7769,  ..., -0.7244,  1.9380, -0.5247])
---------
is , tensor([ 0.1915,  0.2300, -0.2894,  ..., -0.3626,  1.9066,  1.4520])
is , tensor([ 0.1915,  0.2300, -0.2894,  ..., -0.3680,  1.9113,  1.4519])
---------
green , tensor([ 0.1779,  0.1309, -0.1041,  ..., -0.1006,  1.6152,  0.3299])
green , tensor([ 0.1779,  0.1309, -0.1041,  ..., -0.1034,  1.6204,  0.3276])


## [BERT Embeddings](https://github.com/zalandoresearch/flair/blob/master/resources/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md)

+ English
+ Chinese

In [10]:
from flair.embeddings import BertEmbeddings

embedding = BertEmbeddings()

In [11]:
sentence1 = Sentence('The grass is green .')
sentence2 = Sentence('The grava is green .')

embedding.embed(sentence1)
embedding.embed(sentence2)

for token in sentence1:
    print(token, token.embedding.shape)

print('---------')
print(sentence1[0].text, ',', sentence1[0].embedding)
print(sentence2[0].text, ',', sentence2[0].embedding)
print('---------')
print(sentence1[1].text, ',', sentence1[1].embedding)
print(sentence2[1].text, ',', sentence2[1].embedding)
print('---------')
print(sentence1[2].text, ',', sentence1[2].embedding)
print(sentence2[2].text, ',', sentence2[2].embedding)
print('---------')
print(sentence1[3].text, ',', sentence1[3].embedding)
print(sentence2[3].text, ',', sentence2[3].embedding)

Token: 1 The torch.Size([3072])
Token: 2 grass torch.Size([3072])
Token: 3 is torch.Size([3072])
Token: 4 green torch.Size([3072])
Token: 5 . torch.Size([3072])
---------
The , tensor([-0.0323, -0.3904, -1.1946,  ...,  0.1305, -0.1365, -0.4323])
The , tensor([ 0.1527, -0.2964, -0.6643,  ...,  0.2863, -0.2772,  0.0180])
---------
grass , tensor([-0.3973,  0.2652, -0.1337,  ...,  0.3715,  0.1097, -1.1625])
grava , tensor([ 0.3882, -0.4389, -0.0919,  ..., -0.2412,  0.5347,  0.5385])
---------
is , tensor([ 0.1374, -0.3688, -0.8292,  ...,  0.2533,  0.0294,  0.4293])
is , tensor([ 0.4381, -0.4556, -0.3477,  ...,  0.7417,  0.1189,  0.7459])
---------
green , tensor([-0.7722, -0.1152,  0.3661,  ...,  0.1575, -0.0682, -0.7661])
green , tensor([-0.4984, -0.4435,  0.3955,  ...,  0.7010, -0.0687, -0.0059])


In [12]:
from flair.embeddings import BertEmbeddings

embedding = BertEmbeddings('bert-base-chinese')

In [13]:
sentence1 = Sentence('小明 愛 吃 蘋果 。')
sentence2 = Sentence('小明 要 吃 蘋果 。')

embedding.embed(sentence1)
embedding.embed(sentence2)

for token in sentence1:
    print(token, token.embedding.shape)

print('---------')
print(sentence1[0].text, ',', sentence1[0].embedding)
print(sentence2[0].text, ',', sentence2[0].embedding)
print('---------')
print(sentence1[1].text, ',', sentence1[1].embedding)
print(sentence2[1].text, ',', sentence2[1].embedding)
print('---------')
print(sentence1[2].text, ',', sentence1[2].embedding)
print(sentence2[2].text, ',', sentence2[2].embedding)
print('---------')
print(sentence1[3].text, ',', sentence1[3].embedding)
print(sentence2[3].text, ',', sentence2[3].embedding)

Token: 1 小明 torch.Size([3072])
Token: 2 愛 torch.Size([3072])
Token: 3 吃 torch.Size([3072])
Token: 4 蘋果 torch.Size([3072])
Token: 5 。 torch.Size([3072])
---------
小明 , tensor([ 0.3739, -0.7621, -0.3738,  ..., -0.3334, -0.0919,  0.2904])
小明 , tensor([ 0.4984, -0.9325, -0.3565,  ..., -0.3877, -0.2532,  0.0981])
---------
愛 , tensor([ 0.5315, -0.0829, -1.0540,  ...,  0.8733,  0.4928,  0.8453])
要 , tensor([ 0.2315, -0.0260, -0.8028,  ...,  0.7718,  0.2066,  0.3716])
---------
吃 , tensor([ 0.5125, -0.1900,  0.0707,  ...,  0.7921,  0.9561,  0.3185])
吃 , tensor([ 0.2760, -0.2583, -0.6139,  ...,  0.7908,  0.3669,  0.1169])
---------
蘋果 , tensor([ 1.3542, -0.8648, -0.2211,  ...,  0.3679,  0.1687,  0.4705])
蘋果 , tensor([ 1.1218, -0.6747, -0.2706,  ...,  0.2042, -0.3189,  0.6066])


## [Document Embeddings](https://github.com/zalandoresearch/flair/blob/master/resources/docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md)

+ DocumentPoolEmbeddings

In [14]:
from flair.embeddings import DocumentPoolEmbeddings

embedding = DocumentPoolEmbeddings([WordEmbeddings('glove'), 
                                    FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward'), 
                                    ELMoEmbeddings(), 
                                    BertEmbeddings()])

  index_range = sequence_lengths.new_tensor(torch.arange(0, len(sequence_lengths)))


In [15]:
sentence1 = Sentence('The grass is green .')
sentence2 = Sentence('The grava is green .')

embedding.embed(sentence1)
embedding.embed(sentence2)

print(sentence1.get_embedding())
print(sentence2.get_embedding())
print('---------')
print(sentence1.get_embedding().shape)
print(sentence2.get_embedding().shape)

tensor([-0.4826,  0.3338,  0.3487,  ..., -0.4902,  0.9246,  0.6966])
tensor([-0.2953,  0.0270,  0.3005,  ..., -0.5320,  1.1212,  0.2834])
---------
torch.Size([10340])
torch.Size([10340])


  index_range = sequence_lengths.new_tensor(torch.arange(0, len(sequence_lengths)))
