In [1]:
import tensorflow as tf
from elmo import elmo_embedding
import numpy as np
import os
from tqdm import tqdm
import json
from model_v2 import LanguageModel
import unicodedata
import re

In [2]:
with open('baomoi_punc/word2idx.json', 'r') as inp:
    word2idx = json.load(inp)
with open('baomoi_punc/char2idx.json', 'r') as inp:
    char2idx = json.load(inp)
class2idx = {
    'fashion': 0,
    'technology': 1,
    'music': 2,
    'lifestyle': 3,
    'travel': 4
}
idx2class = {
    v: k for k, v in class2idx.items()
}

In [3]:
def cnn(inputs, labels, seq_lens, k_size, n_filters, n_classes, n_layers, n_units, dropout, l2_coef=1e-3, name='classifier', is_training=True, reuse=False):
    with tf.variable_scope(name, reuse=reuse):
        outputs = []
        mask = tf.sequence_mask(seq_lens, dtype=tf.float32)
        mask
        for ksz, nf in zip(k_size, n_filters):
            output = tf.layers.conv1d(inputs,
                                      nf, 
                                      ksz, 
                                      padding='same',
                                      kernel_initializer=tf.glorot_uniform_initializer(), 
                                      bias_initializer=tf.zeros_initializer(), 
                                      kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=l2_coef))
            if is_training:
                output = tf.nn.dropout(output, 1-dropout, noise_shape=(1, 1, nf))
            output = tf.reduce_max(output, axis=1)
            outputs.append(output)
        outputs = tf.concat(outputs, axis=-1)
        for i in range(n_layers):
            W = tf.get_variable(name='W_{}'.format(i), shape=(outputs.shape[-1], n_units), trainable=True, initializer=tf.glorot_uniform_initializer(), regularizer=tf.contrib.layers.l2_regularizer(scale=l2_coef))
            b = tf.get_variable(name='b_{}'.format(i), shape=(n_units,), trainable=True, initializer=tf.zeros_initializer())
            outputs = tf.nn.xw_plus_b(outputs, W, b, name='dense_{}'.format(i))
            if is_training:
                outputs = tf.nn.dropout(outputs, keep_prob=1.0-dropout)
        W = tf.get_variable(name='W_out', shape=(outputs.shape[-1], n_classes), trainable=True, initializer=tf.glorot_uniform_initializer())
        b = tf.get_variable(name='b_out', shape=(n_classes,), trainable=True, initializer=tf.zeros_initializer())
        outputs = tf.nn.xw_plus_b(outputs, W, b, name='dense_out')
        pred = tf.argmax(outputs, axis=-1, name='predict', output_type=tf.int32)
        if is_training:
            loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=outputs))
            acc = tf.reduce_sum(tf.to_float(tf.equal(pred, labels))) / tf.to_float(tf.shape(inputs)[0])
            return outputs, pred, loss, acc
        else:
            prob = tf.nn.softmax(outputs, axis=-1)
            return outputs, pred, prob

In [4]:
tf.reset_default_graph()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
x = tf.placeholder(dtype=tf.float32, shape=(None, None, 1024, 4), name='x')
y = tf.placeholder(dtype=tf.int32, shape=(None,), name='y')
seq_len = tf.placeholder(dtype=tf.int32, shape=(None,), name='seq_len')
drop = tf.placeholder(dtype=tf.float32, shape=(), name='drop')
elmo, elmo_l2_reg = elmo_embedding(x, seq_len, layer_norm=False, l2_coef=1e-3)
outputs, pred, prob = cnn(elmo, y, seq_len, (3, 4, 5), (128, 128, 128), 5, 2, 256, drop, 1e-3, name='classifier', is_training=False)
session.run(tf.global_variables_initializer())
class_saver = tf.train.Saver(tf.global_variables())

In [5]:
with open('15/checkpoints/model_configs.json', 'r') as inp:
    params = json.load(inp)

lm_model = LanguageModel(**params, is_training=False, is_encoding=True)

lm_model.build_model()
lm_saver = tf.train.Saver([x for x in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 'LanguageModel') if x not in tf.get_collection('LSTM_SAVED_STATE')])
session.run(tf.global_variables_initializer())
lm_saver.restore(session, '15/checkpoints/test/model.cpkt-315616')
class_saver.restore(session, 'classify data/12/classifier.cpkt-1960')

INFO:tensorflow:Restoring parameters from 15/checkpoints/test/model.cpkt-315616
INFO:tensorflow:Restoring parameters from classify data/12/classifier.cpkt-1960


In [6]:
def clean_text(x):
    x = str(unicodedata.normalize('NFKC', x.lower().strip()))
    return re.sub('\d+','N', re.sub('[ ]+',' ', re.sub('[\n\r][ \n\r]*',' L ', re.sub(r'(?P<punc>\W)',' \g<punc> ', x))))

def pad_sequence(words):
    maxlen = max(len(x) for x in words)
    arr = np.zeros(shape=(len(words), 1, maxlen))
    for ir in range(len(arr)):
        s = words[ir]
        arr[ir][0][:len(s)] = s
    return arr

def __embed_sequence(sentence):
    unk_char_idx = char2idx['U']
    sentence = [[char2idx.get(x, unk_char_idx) for x in word] for word in sentence]
    seq_len = len(sentence)
    inputs = pad_sequence(sentence)
    embeddings = session.run(lm_model.concated_timewise_output, feed_dict={
        lm_model.inputs: inputs, lm_model.seq_lens: [seq_len], lm_model.reset_state: True
    })
    return embeddings
def embed_sentence(sentence):
#     sentence = clean_text(sentence).split()
    return __embed_sequence(sentence)

In [7]:
def classify_text(sentence):
    sentence = clean_text(sentence).split()
    em = np.transpose(embed_sentence(sentence), (1, 0, 2, 3))
    pr = session.run(prob, feed_dict={
        x: em, seq_len: [len(em[0])], drop: 0.0
    })
    return {k: p for k, p in zip(class2idx, pr[0])}

In [8]:
classify_text("""H√¥m nay Rb r·∫•t vui v√¨ nh·∫≠n ƒëc gi·ªè qu√† xinh x·∫Øn n√®: v√≤ng Nguy·ªát Qu·∫ø, chai d·∫ßu √î Liu, m√¥ h√¨nh t√≤a nh√† truy·ªÅn th·ªëng Hy L·∫°p, chi·∫øc mu·ªóng g·ªó d·ªÖ th∆∞∆°ng, th√¥ng ƒëi·ªáp ƒë√°ng y√™u v√† ƒë·∫∑c bi·ªát l√† m√≥n s·ªØa chua phong c√°ch Hy L·∫°p danh ti·∫øng.

C·∫£m ∆°n Vinamilk r·∫•t nhi·ªÅu v·ªÅ m√≥n qu√† d·ªÖ th∆∞∆°ng ƒë·∫≠m ch·∫•t "Hy L·∫°p" n√†y nh√©. Tr·∫£i nghi·ªám ƒë·∫ßu ti√™n c·ªßa Rb sau khi nh√¢m nhi s·ªØa chua Vinamilk Greek Yoghurt Style l√† ƒë·ªô s√°nh m·ªãn ƒë·∫∑c tr∆∞ng k√®m h∆∞∆°ng v·ªã tr√°i c√¢y r·∫•t th∆°m ngon. H∆°n n·ªØa, s·∫£n ph·∫©m n√†y ch·ª©a g·∫•p ƒë√¥i protein v√† gi·∫£m 50% b√©o gi√∫p c√¢n b·∫±ng dinh d∆∞·ª°ng m·ªôt c√°ch t·ª± nhi√™n, gi√∫p cho da ƒë·∫πp v√† d√°ng xinh m·ªói ng√†y n√®ee.

ƒê·ª£t ngh·ªâ l·ªÖ s·∫Øp t·ªõi, c·∫£ nh√† c√≥ th·ªÉ b·ªï sung Vinamilk Greek Yoghurt Style v√†o th·ª±c ƒë∆°n tr√°ng mi·ªáng ho·∫∑c danh s√°ch c√°c m√≥n ƒÉn x·∫ø nh√©. V·ª´a ngon l·∫°i t·ªët cho s·ª©c kh·ªèe m√† kh√¥ng c·∫ßn lo tƒÉng c√¢n n·ªØa nhaaaa <3

#S·ªØachuaHyL·∫°p #VinamilkGreekYoghurt #Vinamilk #B√≠quy·∫øtkh·ªèeƒë·∫πpt·ª´HyL·∫°p #B√≠quy·∫øtkh·ªèeƒë·∫πphi·ªánƒë·∫°i #B·ªØaX·∫øTh·∫ßnTh√°nh""")

{'fashion': 0.00036384078,
 'technology': 7.625535e-07,
 'music': 1.792387e-07,
 'lifestyle': 0.5603025,
 'travel': 0.43933263}

In [9]:
classify_text("B√°nh b√®o nh·∫≠p")

{'fashion': 0.00026679866,
 'technology': 0.094306804,
 'music': 0.09219778,
 'lifestyle': 0.57952005,
 'travel': 0.23370853}

In [10]:
classify_text("Ph√≥ H·∫±ng, ka - ji - maaaaaaaaa :((((((")

{'fashion': 0.42557693,
 'technology': 0.0071761818,
 'music': 0.44346178,
 'lifestyle': 0.10953595,
 'travel': 0.014249128}

In [11]:
classify_text("Qu√°n cf b·∫•t ·ªïn tr·ªü l·∫°i r·ªìi n√®eeeee!!!")

{'fashion': 0.0034559632,
 'technology': 0.15520361,
 'music': 0.016786214,
 'lifestyle': 0.0010387475,
 'travel': 0.8235155}

In [12]:
classify_text("""H√¥m qua r√°ng th·ª©c canh sale flash day 0h c·ªßa Lazada m√† loay hoay sao ng·ªß qu√™n ƒë·ªÉ l·ª° m·∫•t.

Nay canh me 10h v√†o, tr·ªÖ h∆°n ng∆∞·ªùi kh√°c m·ªôt x√≠u m√† th·∫•y deal ƒë√£ v∆°i ƒëi m·ªôt n·ª≠a r·ªìi.

Th√¥i l·∫°i ƒë·ª£i t·ªõi 20h ƒë·ªÉ r√°ng h·ªët ƒë·ª£t cu·ªëi v·∫≠yyyyyy.

T·ª´ gi·ªù t·ªõi h·∫øt ng√†y v·∫´n c√≤n sale ƒë·ªìng gi√° nhi·ªÅu l·∫Øm, Rb th·∫•y c√≥ c·∫£ m√°y t√≠nh b·∫£ng lenovo ideapad, ƒë·ªìng h·ªì wellington, tai nghe Beats,...

B·∫°n n√†o b·ªã l·ª° ƒë·ª£t sale tr∆∞·ªõc th√¨ tham kh·∫£o link b√™n d∆∞·ªõi v√† canh gi·ªù mua cho m√¨nh nh·ªØng s·∫£n ph·∫©m m√¨nh th√≠ch v·ªõi gi√° r·∫ª nhaaaaa ‚ù§

üëâüèª http://bit.ly/8-8LZD""")

{'fashion': 5.3068256e-06,
 'technology': 0.81009513,
 'music': 0.0016731316,
 'lifestyle': 0.0012341929,
 'travel': 0.18699224}

In [13]:
classify_text("""V√≤ng v√≤ngg""")

{'fashion': 0.0006011887,
 'technology': 0.01154514,
 'music': 0.0038009107,
 'lifestyle': 0.0004980784,
 'travel': 0.98355466}

In [14]:
classify_text('M√¨nh b·∫Øt ch∆∞·ªõc m√®o k√™u ko anh???')

{'fashion': 5.6284647e-05,
 'technology': 0.0631349,
 'music': 0.044590887,
 'lifestyle': 0.8834693,
 'travel': 0.008748686}

In [15]:
classify_text("""T·ªõi gi·ªù Ribi m·ªõi bi·∫øt l√† tr√°i s∆° ri r·∫ª v·∫≠y th√¥i m√† c√≥ ch·ª©a nhi·ªÅu d∆∞·ª°ng ch·∫•t l·∫Øm nha, c√≥ nhi·ªÅu vitamin A, C, d∆∞·ª°ng ch·∫•t ƒë·∫ßy ƒë·ªß, ch·ªëng oxy h√≥a khi·∫øn da ƒë·∫πp, kh√¥ng b·ªã l√£o ho√° v√† ƒëi k√®m r·∫•t nhi·ªÅu l·ª£i √≠ch kh√°c. ƒÇn s∆° ri ng·ªçt ng·ªçt chua chua ch·∫•m mu·ªëi n·ªØa l√† ƒë√∫ng b√†i lu√¥n :))

B·ªØa gi·ªù ƒëi quay, ch·ª•p li√™n t·ª•c, ƒëang m·ªát t·ª± nhi√™n ƒë∆∞·ª£c mua cho chai n∆∞·ªõc s∆° ri si√™u ngon, b·∫°n ·∫•y c√≤n k√™u n∆∞·ªõc n√†y "th·∫ßn th√°nh" l·∫Øm v√¨ c√≥ ch·∫•t Glutathione d∆∞·ª°ng s√°ng da t·ª´ b√™n trong n·ªØa.

M·ªçi ng∆∞·ªùi mu·ªën mua th√¨ h·ªèi Kirin Ice+ Beauti, chai m√†u h·ªìng d·ªÖ th∆∞∆°ng Ribi ƒëang c·∫ßm trong h√¨nh ƒë√≥. U·ªëng sau khi ƒÉn s√°ng ho·∫∑c ƒÉn tr∆∞a ƒë·ªÉ h·∫•p thu t·ªët h∆°n, b·ªï sung n∆∞·ªõc v√† d∆∞·ª°ng ch·∫•t cho da nha.

ƒê·∫∑c bi·ªát v√†o ng√†y 02/08, Ribi ƒë∆∞·ª£c b√™n Kirin m·ªùi tham gia d·ª± s·ª± ki·ªán ra m·∫Øt s·∫£n ph·∫©m n√†y ·ªü Novotel Saigon, n√™n Ribi ∆∞u √°i d√†nh t·∫∑ng 20 su·∫•t cho 20 b·∫°n may m·∫Øn ƒëi c√πng Ribi ƒë·∫øn s·ª± ki·ªán v√† t·∫≠n h∆∞·ªüng s·∫£n ph·∫©m tuy·ªát v·ªùi n√†y nha.

ƒê√¢y l√† link tham gia n√®: https://goo.gl/forms/7IILsh1kPUGWrKqz2. Form n√†y s·∫Ω ƒë√≥ng v√†o
cu·ªëi ng√†y 29 nh√©.

ƒê√∫ng ng√†y 30/7, nh·ªØng b·∫°n may m·∫Øn s·∫Ω ƒë∆∞·ª£c nh·∫≠n email th√¥ng b√°o k·∫øt qu·∫£ v√† h∆∞·ªõng d·∫´n chi ti·∫øt ƒë·ªÉ tham gia s·ª± ki·ªán nha.
Nhanh tay ƒëƒÉng k√≠ nha m·∫•y b·∫°n""")

{'fashion': 1.7280232e-05,
 'technology': 0.00876589,
 'music': 2.3415252e-05,
 'lifestyle': 0.98334956,
 'travel': 0.0078438725}

In [16]:
classify_text("""C√≥ ai mua ƒë·∫ßm ·ªü JM ch∆∞a n√®??? H√£ng th·ªùi trang chuy√™n v·ªÅ ƒë·∫ßm thi·∫øt k·∫ø s·∫Ω ch√≠nh th·ª©c khai tr∆∞∆°ng c∆° s·ªü th·ª© 15 t·∫°i qu·∫≠n 7 n√®, m·ªçi ng∆∞·ªùi tranh th·ªß gh√© qua shopping nha, c√≥ nhi·ªÅu ƒë·∫ßm ƒë·∫πp l·∫Ømmmm.

17h30 chi·ªÅu mai (14/7/2018) t·∫°i JM - 320 Nguy·ªÖn Th·ªã Th·∫≠p, ph∆∞·ªùng T√¢n Quy, qu·∫≠n 7 nh√© m·ªçi ng∆∞·ªùi.

ƒê·∫øn s·ªõm s·ªõm ch√∫t x√≠u ƒë·ªÉ giao l∆∞u c√πng Rb v√† nh·∫≠n th·∫≠t nhi·ªÅu ∆ØU ƒê√ÉI t·ª´ JM c√°c b·∫°n nha ‚ù§

#damthietkeJM""")

{'fashion': 0.9854525,
 'technology': 3.303245e-07,
 'music': 0.00010304647,
 'lifestyle': 0.00025467935,
 'travel': 0.014189452}

In [17]:
classify_text("""Ngh√¨n nƒÉm h·ªôi ng·ªô :v""")

{'fashion': 0.0026618436,
 'technology': 0.7234123,
 'music': 0.15934226,
 'lifestyle': 0.05093748,
 'travel': 0.06364623}

In [18]:
classify_text("V√¨ ph√≠a tr∆∞·ªõc hai ta s·∫Ω c√≤n ƒëo·∫°n ƒë∆∞·ªùng r·∫•t d√†i !")

{'fashion': 0.017974585,
 'technology': 0.7119603,
 'music': 0.075174615,
 'lifestyle': 0.0068301233,
 'travel': 0.18806046}

In [19]:
classify_text("Quyen Le gi·ªèi qu√° Q, b·∫£n lƒ©nh l·∫Ømmmmm, ch√∫c m·ª´ng e v√† c√°c ƒë·ªìng ƒë·ªôi nha :*")

{'fashion': 0.0006057551,
 'technology': 0.87591195,
 'music': 0.101348825,
 'lifestyle': 0.000115391456,
 'travel': 0.02201808}

In [20]:
classify_text("‚ù§ c·∫£m ∆°n m·ªçi ng∆∞·ªùi ! <3")

{'fashion': 0.55040735,
 'technology': 0.011474007,
 'music': 0.10431931,
 'lifestyle': 0.09591513,
 'travel': 0.23788427}

In [21]:
classify_text("Trailer ss5 nha c√°c b·∫°n tr·∫ª :*")

{'fashion': 1.2542271e-05,
 'technology': 0.9948147,
 'music': 0.0035700693,
 'lifestyle': 2.2032264e-05,
 'travel': 0.0015805265}

In [22]:
classify_text("""M·∫•y b·ªØa tr∆∞·ªõc th·∫•y b·∫°n b√® r·∫ßn r·∫ßn mua gi√†y ·ªü Sablanca n√™n nay ƒë∆∞·ª£c ngh·ªâ c≈©ng tranh th·ªß ra c·ª≠a h√†ng mua n√®.
H√™n sao ƒëi mua tr√∫ng ƒë·ª£t offer 40% c·ªßa c·ª≠a h√†ng n·ªØa, m√™ qu√° n√™n t·∫≠u li·ªÅn m·∫•y ƒë√¥i v·ªÅ lu√¥n :))

Gi√†y ·ªü ƒë√¢y gi√° c·∫£ h·ª£p l√Ω, v·ª´a t√∫i ti·ªÅn c·ªßa m·ªçi ng∆∞·ªùi n√®, m·∫´u m√£ c≈©ng ƒëa d·∫°ng n·ªØa, ngo√†i ra c√≤n c√≥ nhi·ªÅu m·∫´u t√∫i x√°ch xinh xinh cho c√°c b·∫°n tho·∫£i m√°i l·ª±a ch·ªçn nhen.

B·∫°n n√†o ƒëang ph√¢n v√¢n ch∆∞a bi·∫øt l·ª±a gi√†y ·ªü ƒë√¢u th√¨ c·ª© gh√© th·ª≠ nha, bi·∫øt ƒë√¢u t√¨m ƒëc ƒë√¥i gi√†y ph√π h·ª£p cho m√¨nh n√® üòÅ

Th√¥ng tin chi ti·∫øt v·ªÅ ƒë·ª£t OFFER 40% c√°c b·∫°n xem t·∫°i ƒë√¢y nhaa
üëâüèª https://www.facebook.com/Sablanca.vn/ nhaaaaaaaa""")

{'fashion': 0.017381525,
 'technology': 0.009179166,
 'music': 2.7035716e-05,
 'lifestyle': 0.5734099,
 'travel': 0.40000236}

In [23]:
classify_text("""B·∫°n n√†o l√† t√≠n ƒë·ªì l√†m ƒë·∫πp ch·∫Øc ch·∫Øn bi·∫øt s·ªØa ong ch√∫a r·∫•t t·ªët cho da ƒë√∫ng ko?
ƒê∆∞·ª£c h√¥m ƒëi si√™u th·ªã t√¨m ƒë∆∞·ª£c b·ªô s·ªØa t·∫Øm v√† lotion n√†y c·ªßa Purit√©, c√≥ th√†nh ph·∫ßn ch·ª©a tinh ch·∫•t s·ªØa ong ch√∫a d∆∞·ª°ng tr·∫Øng da to√†n th√¢n lu√¥n, nh√¨n th·∫•y m√™ qu√° n√™n Rb quy·∫øt ƒë·ªãnh rinh ngay c·∫£ b·ªô v·ªÅ üòÇ

ƒê·∫ßu ti√™n l√† h∆°i b·∫•t ng·ªù v·ªõi kh·∫£ nƒÉng d∆∞·ª°ng tr·∫Øng c·ªßa em √≠, da k tr·∫Øng li·ªÅn m√† r·∫•t t·ª± nhi√™n, l√™n tone t·ª´ t·ª´ n√®, da tr·∫Øng r·∫•t t·ª± nhi√™n ch·ª© kh√¥ng ph·∫£i ki·ªÉu tr·∫Øng b·ªách nha. Rb c√≥ ƒë·ªçc ƒë∆∞·ª£c l√† nh·ªù th√†nh ph·∫ßn s·ªØa ong ch√∫a t·ª± nhi√™n r·∫•t gi√†u vitamin gi√∫p k√≠ch th√≠ch qu√° tr√¨nh thay da n√™n r·∫•t an to√†n nhen. ƒê√¢y c≈©ng l√† c∆° ch·∫ø d∆∞·ª°ng tr·∫Øng t·ª± nhi√™n m√† Rb v·ª´a t√¨m hi·ªÉu ƒë∆∞·ª£c.

Th·ª© hai l√† ai ƒë√£ d√πng s·∫£n ph·∫©m c·ªßa d√≤ng n√†y r·ªìi ƒë·ªÅu ph·∫£i c√¥ng nh·∫≠n h∆∞∆°ng th∆°m r·∫•t d·ªÖ ch·ªãu, nh·∫π nh√†ng, tinh t·∫ø ƒë√∫ng k? N·∫øu d√πng k√®m v·ªõi em lotion c√πng b·ªô n·ªØa th√¨ hi·ªáu qu·∫£ d∆∞·ª°ng tr·∫Øng tƒÉng g·∫•p ƒë√¥i v√† kh√¥ng c·∫ßn d√πng th√™m n∆∞·ªõc hoa n·ªØa lu√¥n ƒë√≥.

Lotion n√†y kh√¥ng b·ªã nh·ªùn r√≠t, thoa t·ªõi ƒë√¢u th·∫•m t·ªõi ƒë√≥, r·∫•t th√≠ch h·ª£p cho m√πa h√® nƒÉng ƒë·ªông n√®eee.

V·ª´a tr·∫Øng t·ª± nhi√™n an to√†n, v·ª´a th∆°m tinh t·∫ø, ch·∫Øc ch·∫Øn s·∫Ω l√† s·ª± l·ª±a ch·ªçn ph√π h·ª£p cho c√°c b·∫°n g√°i t·ª•i m√¨nh ƒë√≥ ‚ò∫Ô∏è üòâ

C√°c b·∫°n c√≥ th·ªÉ t√¨m hi·ªÉu kƒ© s·∫£n ph·∫©m h∆°n ·ªü link d∆∞·ªõi nhaaa
üëâüèª bit.ly/RS-PuriteWhitening""")

{'fashion': 5.1378515e-07,
 'technology': 3.79505e-07,
 'music': 2.8193603e-10,
 'lifestyle': 0.99999905,
 'travel': 1.4747675e-07}

In [24]:
classify_text("V·∫´n ƒëang quay khuyaaa n√®e")

{'fashion': 4.3344113e-05,
 'technology': 0.57065797,
 'music': 0.418213,
 'lifestyle': 3.7746738e-06,
 'travel': 0.011081896}

In [25]:
classify_text("Nhi·ªÖm phim ph·∫£i nh·∫≠p vai v√†o phim lu√¥n m·ªõi tr·∫•t'ss nh√© =)))))))")

{'fashion': 0.0018696897,
 'technology': 0.6213504,
 'music': 0.34524873,
 'lifestyle': 0.02408247,
 'travel': 0.0074486285}

In [26]:
classify_text("""Nh∆∞ trong b√†i chia s·∫ª tr∆∞·ªõc c·ªßa Rb, n∆∞·ªõc g·∫°o Morning Rice r·∫•t t·ªët cho s·ª©c kho·∫ª ch√∫ng ta r·ªìi ƒë√∫ng kh√¥ng?
Morning Rice l√† s·∫£n ph·∫©m ƒë∆∞·ª£c l√†m t·ª´ g·∫°o, v√† nguy√™n li·ªáu g·∫°o xu·∫•t x·ª© 100% t·ª´ H√†n Qu·ªëc, s·∫£n ph·∫©m c≈©ng b√°n ch·∫°y nh·∫•t t·∫°i H√†n , l·∫°i c√≥ v·ªã th∆°m, ng·ªçt d·ªãu m√† kh√¥ng b·ªã ng·∫•y.

Rb th∆∞·ªùng u·ªëng Morning Rice khi ƒëi quay, hay b·∫•t c·ª© khi n√†o th√®m 1 m·ªôt m√≥n n∆∞·ªõc n√†o ng·ªçt ng·ªçt, ƒë·∫∑c bi·ªát n·∫øu c√°c b·∫°n u·ªëng Morning Rice v√†o bu·ªïi s√°ng c·ª±c k·ª≥ t·ªët lu√¥n, kh√¥ng nh·ªØng cung c·∫•p ƒë·ªß n∆∞·ªõc cho c·∫£ m·ªôt ng√†y d√†i m√† c√≤n t·ªët cho s·ª©c kho·∫ª, gi√∫p s·∫£ng kho√°i tinh th·∫ßn cho 1 ng√†y m·ªõi.

B·∫°n n√†o ƒë√£ s·ª≠ d·ª•ng s·∫£n ph·∫©m n√†y th∆∞·ªùng xuy√™n v√† th·∫•y hi·ªáu qu·∫£ r·ªìi th√¨ c√πng chia s·∫ª c√πng Rb cho c√°c b·∫°n ch∆∞a d√πng nha üòÄ""")

{'fashion': 5.744237e-08,
 'technology': 1.9635233e-06,
 'music': 8.4623827e-07,
 'lifestyle': 0.9999598,
 'travel': 3.7258942e-05}

In [27]:
classify_text("""L√¢u l·∫Øm m·ªõi ƒëc ch·ª•p ch√¢n dung :))""")

{'fashion': 0.0028528257,
 'technology': 0.86728126,
 'music': 0.04706661,
 'lifestyle': 0.021103393,
 'travel': 0.061695933}

In [28]:
classify_text("""ƒêlq nh∆∞ng phi√™n b·∫£n m·ªü r·ªông c·ªßa Global Warming c·ªßa con Pit s·∫Ω ƒë∆∞·ª£c ra m·∫Øt v√†o 25/11 nƒÉm nay b·ªï sung 1 s·ªë b√†i m·ªõi trong ƒë√≥ c√≥ Timber feat Flop-sa. 

Cover ch√≠nh th·ª©c nh√©, c·ª© t·ª±a t·ª±a nh∆∞ m·ª• Nicki v·ªõi c√°i Era Pink Friday 8-}""")

{'fashion': 6.6393713e-09,
 'technology': 0.99877745,
 'music': 0.0012156122,
 'lifestyle': 1.5217615e-06,
 'travel': 5.4059547e-06}

In [29]:
classify_text("""Sinh nh·∫≠t nƒÉm nay th·ª±c s·ª± qu√° h·∫°nh ph√∫c, nh·∫≠n ƒë∆∞·ª£c r·∫•t nhi·ªÅu l·ªùi ch√∫c c·ªßa gia ƒë√¨nh, Faptv, b·∫°n b√®, r·∫•t nhi·ªÅu t√¨nh c·∫£m t·ª´ nh·ªØng ng∆∞·ªùi xa l·∫°,..
C·∫£m ∆°n t·∫•t c·∫£ c√°c b·∫°n ƒë√£ ch√∫c sn v√† g√¢y r·∫•t nhi·ªÅu b·∫•t ng·ªù cho Rb nha, x√∫c ƒë·ªông l·∫Øm l·∫Ømmm.
C√≥ nhi·ªÅu tin nh·∫Øn qu√° Rb k c·∫£m ∆°n h·∫øt t·ª´ng ng∆∞·ªùi ƒë∆∞·ª£c n√™n c·∫£m ∆°n c√°c b·∫°n t·∫°i stt n√†y nha.
Hy v·ªçng nh·ªØng nƒÉm v·ªÅ sau n·ªØa ch√∫ng ta v·∫´n m√£i b√™n nhau th·∫ø n√†y ‚ù§

P/s: ƒê√¢y l√† nh√≥m b·∫°n kim c∆∞∆°ng 10 nƒÉm t·ªï ch·ª©c sn cho m√¨nh """)

{'fashion': 0.07306688,
 'technology': 0.0027055887,
 'music': 0.57545847,
 'lifestyle': 0.0005227887,
 'travel': 0.34824622}

In [30]:
classify_text("Ch·ªâ mu·ªën n√≥i l√† h√¥m nay qu√° nhi·ªÅu b·∫•t ng·ªù v√† h·∫°nh ph√∫c. M·ªôt ng√†y tuy·ªát ƒë·∫πp <3")

{'fashion': 0.3468721,
 'technology': 0.014884135,
 'music': 0.48088226,
 'lifestyle': 0.14248663,
 'travel': 0.014874797}

In [31]:
classify_text("L√¢u l·∫Øm m·ªõi c·ªôt t√≥c cao :))")

{'fashion': 0.03522759,
 'technology': 0.20556141,
 'music': 0.047811918,
 'lifestyle': 0.38692945,
 'travel': 0.32446963}

In [32]:
classify_text("""L√¢u l·∫Øm m·ªõi l√†m clip n√® :))

#Sponsored #SpotifyMVP #Spotify #SpotifyNgheLaMe #SpotifyVietnam""")

{'fashion': 0.0021324586,
 'technology': 0.028690005,
 'music': 0.00039155222,
 'lifestyle': 0.010334127,
 'travel': 0.9584518}

In [33]:
classify_text("""L√¢u l·∫Øm m·ªõi l√†m clip n√® :))""")

{'fashion': 0.0021030519,
 'technology': 0.14405298,
 'music': 0.17434731,
 'lifestyle': 0.4145741,
 'travel': 0.26492256}

In [34]:
classify_text("""V·ª´a ƒë∆∞·ª£c b·ªè t√∫i b√≠ k√≠p ch·ªëng n·∫Øng trong nh·ªØng ng√†y h√® oi b·ª©c, v·ª´a c√≥ qu√†, ai th√≠ch kh√¥ng n√®!

C√≥ r·∫•t nhi·ªÅu lo·∫°i kem ch·ªëng n·∫Øng, nh∆∞ng ƒë·ªÉ l·ª±a ƒë∆∞·ª£c lo·∫°i kem ch·ªëng n·∫Øng ph√π h·ª£p, kh√¥ng g√¢y nh·ªùn r√≠t v√† s·ª≠ d·ª•ng ph√π h·ª£p l·∫°i kh√¥ng d·ªÖ ch√∫t n√†o. Rb m·ªõi bi·∫øt th√™m kem ch·ªëng n·∫Øng Anthelios Dry Touch, n·∫øu b·∫°n n√†o d√πng r·ªìi ch·∫Øc ch·∫Øn r·∫•t th√≠ch ƒë√∫ng k?

Rb d√πng tr∆∞·ªõc khi ra ngo√†i hay tr∆∞·ªõc khi make up, apply m·ªôt l·ªõp kem ch·ªëng n·∫Øng n√†y, sau nhi·ªÅu gi·ªù ho·∫°t ƒë·ªông ngo√†i tr·ªùi da kh√¥ng h·ªÅ b·ªã ƒë·ªï d·∫ßu v√† b√£ nh·ªùn nh·ªù c√≥ ho·∫°t ch·∫•t Airlicium. B√™n c·∫°nh ƒë√≥, nh·ªù c√≥ c√¥ng ngh·ªá ƒë·ªôc quy·ªÅn XL- Protect gi√∫p ch·ªëng n·∫Øng r·∫•t hi·ªáu qu·∫£, ngƒÉn ch·∫∑n ƒë∆∞·ª£c tia UVB, UVA ƒë·∫∑c bi·ªát l√† tia h·ªìng ngo·∫°i v√† √¥ nhi·ªÖm.

Ribi c√≤n ph√°t hi·ªán ra La Roche-Posay c√≥ h·∫≥n d√≤ng x·ªãt ch·ªëng n·∫Øng Anthelios Mist, khi x·ªãt l√™n da th√¨ th·∫©m th·∫•u nhanh v√† kh√¥ng h·ªÅ ƒë·ªçng gi·ªçt. V√† ƒë·∫∑c bi·ªát l√† c√≥ th·ªÉ x·ªãt ngay tr√™n l·ªõp makeup lu√¥n nh√©, r·∫•t ti·ªán cho c√¥ng vi·ªác c·ªßa Rb khi ph·∫£i quay li√™n t·ª•c ngo√†i tr·ªùi v√† k c√≥ nhi·ªÅu th·ªùi gian chƒÉm s√≥c l·∫°i l·ªõp ch·ªëng n·∫Øng v√† make up c·ªßa m√¨nh.

Cu·ªëi c√πng l√† sau m·ªôt ng√†y l√†m vi·ªác th√¨ ƒë·ª´ng ai qu√™n ph·∫£i t·∫©y trang tr∆∞·ªõc khi ƒëi ng·ªß nha. ƒê·ª´ng l∆∞·ªùi m√† ƒëi ng·ªß lu√¥n, m·ª•n s·∫Ω n·ªïi ƒë·∫ßy m·∫∑t ƒë√≥. C√°c b·∫°n h√£y d√πng 1 mi·∫øng b√¥ng t·∫©y trang v√† cho 1 √≠t n∆∞·ªõc t·∫©y trang Micellar Water Ultra l√™n v√† l√†m s·∫°ch da. V·ªõi c√¥ng ngh·ªá c·∫£i ti·∫øn Glyco Micellar gi√∫p l·∫•y ƒëi b·ª•i b·∫©n, b√£ nh·ªùn v√† l·ªõp trang ƒëi·ªÉm, gi√∫p l√†m s·∫°ch da s√¢u v∆∞·ª£t tr·ªôi, nh∆∞ng kh√¥ng h·ªÅ c√≥ c·∫£m gi√°c kh√¥ cƒÉng, da s·∫°ch r·ªìi nhennn.

M·ªçi ng∆∞·ªùi mu·ªën bi·∫øt th√™m th√¥ng tin th√¨ xem t·∫°i link ƒë√¢y nha: http://bit.ly/SUN_LZDKOL ƒêang c√≥ ∆∞u ƒë√£i khi mua s·∫£n ph·∫©m La Roche Posay tr√™n Lazada ƒë√≥!

B√™n c·∫°nh ƒë√≥ th√¨ b·∫°n n√†o mu·ªën c√≥ qu√† th√¨ nh·ªõ nhanh tay like fanpage La Roche-Posay Vietnam v√† post n√†y c·ªßa Ribi nha. Sau ƒë√≥ tag t√™n 3 ng∆∞·ªùi b·∫°n th√¢n c·ªßa m√¨nh, Ribi s·∫Ω l·ª±a ch·ªçn ng·∫´u nhi√™n 5 b·∫°n may m·∫Øn nh·∫≠n ƒë∆∞·ª£c b·ªô s·∫£n ph·∫©m nh∆∞ n√†y nh√© ‚ù§

#larocheposay #anthelios #baovetoiuu #dakhoedonnang""")

{'fashion': 1.9522306e-06,
 'technology': 0.00015338778,
 'music': 3.443241e-07,
 'lifestyle': 0.99965286,
 'travel': 0.00019136645}

In [35]:
classify_text("""S√°ng t·ªõi gi·ªù ch·ªët ƒë∆°n cho m·∫•y x·ªã ƒë·∫πp ƒëi ch∆°i l·ªÖ m√† con b√© c≈©ng h√°o h·ª©c theoüòùüòù l·ªÖ n√†y m·∫•y x·ªã ƒëi ch∆°i ·∫£nh ƒë·∫πp th√¨ t·∫∑ng b√© nha, b√© n·∫±m nh√† ch·ªù t·∫£i ·∫£nh ƒë·∫πp c·ªßa m·∫•y x·ªãüíìüíìüíì""")

{'fashion': 0.002549346,
 'technology': 0.0025683732,
 'music': 0.00090067496,
 'lifestyle': 0.03395762,
 'travel': 0.960024}

In [36]:
classify_text("""H∆°n 100 ƒë∆°n h√†ng m·ªói ng√†y l√† nh·ªù v√†o ch·∫•t l∆∞·ª£ng v√† t√¨nh c·∫£m kh√°ch h√†ng ti·∫øng l√†nh ƒë·ªìn xaüòú c·∫£m ∆°n kh√°ch h√†ng lu√¥n ·ªßng h·ªô m·∫π Joy!!! S·∫Ω c·ªë g·∫Øng trong th·ªùi gian s·ªõm nh·∫•t th√™m 1 chi nh√°nh n·ªØa ƒë·ªÉ ch·ªã em g·∫ßn h∆°n v·ªõi T√≥c Gi·∫£ HMT üôèüèª""")

{'fashion': 0.0715459,
 'technology': 0.41376218,
 'music': 0.050756563,
 'lifestyle': 0.3171401,
 'travel': 0.14679521}

In [37]:
classify_text("""üëâüèªA08 v·∫´n c·ª© l√† tuy·ªát v·ªùi n√™n pr l·∫°i cho m·∫•y ch·ªã em nh√† m√¨nh ƒë·ª´ng b·ªè Qu√™n em ·∫•yüòúüòú 1 b∆∞·ªõc xinh lung linh kh√¥ng ph·∫£i r∆∞·ªùm r√† g√¨ h·∫øt √Ωüíöüíöüíö
5m√†u: ƒëen, n√¢u ƒëen, n√¢u ƒë·ªè, n√¢u v√†ng, socola""")

{'fashion': 0.062238663,
 'technology': 0.0080154985,
 'music': 0.0016431166,
 'lifestyle': 0.9206771,
 'travel': 0.0074255844}

In [38]:
classify_text("""L√† c√¥ g√°i hay m·ªông m∆°üòπüòπ""")

{'fashion': 0.0029898842,
 'technology': 3.7148387e-05,
 'music': 0.0011402399,
 'lifestyle': 0.99566627,
 'travel': 0.00016646052}

In [39]:
classify_text("""Kh√¥ng c√≥ vi·ªác g√¨ kh√≥, ch·ªâ s·ª£ l√≤ng kh√¥ng b·ªÅn""")

{'fashion': 0.017157283,
 'technology': 0.8745693,
 'music': 0.07380007,
 'lifestyle': 0.025712987,
 'travel': 0.008760272}

In [40]:
classify_text("""N·∫øu b·∫°n th√≠ch chinh ph·ª•c nh·ªØng cung ƒë∆∞·ªùng m·∫°o hi·ªÉm th√¨ ƒë√¢y ch√≠nh l√† g√≥i b·∫£o hi·ªÉm d√†nh cho b·∫°n. B·∫£o hi·ªÉm FWD ƒëang t·∫∑ng MI·ªÑN PH√ç 100,000 g√≥i b·∫£o hi·ªÉm tai n·∫°n c√° nh√¢n, b·∫£o hi·ªÉm ƒë·∫øn. Nhanh tay ƒëƒÉng k√Ω v√† nh·∫≠n ngay g√≥i b·∫£o hi·ªÉm ch·ªâ trong v√≤ng 3 ph√∫t t·∫°i""")

{'fashion': 2.5013337e-06,
 'technology': 0.9803263,
 'music': 8.165938e-06,
 'lifestyle': 0.00016041716,
 'travel': 0.019502576}