In [None]:
!pip install gdown



In [None]:
#Download do modelo pré-treinado em 1.000 reviews de 8 apps
#https://drive.google.com/file/d/1PQd1a3omFfoSqbYANaCGGiSq32-9Q7QA/view?usp=sharing

!gdown https://drive.google.com/uc?id=1PQd1a3omFfoSqbYANaCGGiSq32-9Q7QA

!tar -vzxf all_app_model.tar.gz 

Downloading...
From: https://drive.google.com/uc?id=1PQd1a3omFfoSqbYANaCGGiSq32-9Q7QA
To: /content/all_app_model.tar.gz
425MB [00:01, 247MB/s]
state_dict/
state_dict/.ipynb_checkpoints/
state_dict/lcf_bert_all_app_val_acc0.9148


In [None]:
!git clone https://github.com/songyouwei/ABSA-PyTorch
!mv ABSA-PyTorch/* .
!pip install -r requirements.txt

Cloning into 'ABSA-PyTorch'...
remote: Enumerating objects: 480, done.[K
remote: Total 480 (delta 0), reused 0 (delta 0), pack-reused 480[K
Receiving objects: 100% (480/480), 3.79 MiB | 22.42 MiB/s, done.
Resolving deltas: 100% (306/306), done.
Collecting transformers<4.0.0,>=3.5.1
[?25l  Downloading https://files.pythonhosted.org/packages/3a/83/e74092e7f24a08d751aa59b37a9fc572b2e4af3918cb66f7766c3affb1b4/transformers-3.5.1-py3-none-any.whl (1.3MB)
[K     |████████████████████████████████| 1.3MB 16.5MB/s 
Collecting tokenizers==0.9.3
[?25l  Downloading https://files.pythonhosted.org/packages/4c/34/b39eb9994bc3c999270b69c9eea40ecc6f0e97991dba28282b9fd32d44ee/tokenizers-0.9.3-cp36-cp36m-manylinux1_x86_64.whl (2.9MB)
[K     |████████████████████████████████| 2.9MB 57.7MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
[K     |█████████████████████

In [None]:
# -*- coding: utf-8 -*-
# file: infer_example_bert_models.py
# author: songyouwei <youwei0314@gmail.com>
# fixed: yangheng <yangheng@m.scnu.edu.cn>
# Copyright (C) 2018. All Rights Reserved.

import numpy as np
import torch
import torch.nn.functional as F
from models.lcf_bert import LCF_BERT
from models.aen import AEN_BERT
from models.bert_spc import BERT_SPC
from transformers import BertModel
from data_utils import Tokenizer4Bert
import argparse


def pad_and_truncate(sequence, maxlen, dtype='int64', padding='post', truncating='post', value=0):
    x = (np.ones(maxlen) * value).astype(dtype)
    if truncating == 'pre':
        trunc = sequence[-maxlen:]
    else:
        trunc = sequence[:maxlen]
    trunc = np.asarray(trunc, dtype=dtype)
    if padding == 'post':
        x[:len(trunc)] = trunc
    else:
        x[-len(trunc):] = trunc
    return x

def prepare_data(text_left, aspect, text_right, tokenizer):
    text_left = text_left.lower().strip()
    text_right = text_right.lower().strip()
    aspect = aspect.lower().strip()
    
    text_raw_indices = tokenizer.text_to_sequence(text_left + " " + aspect + " " + text_right)            
    aspect_indices = tokenizer.text_to_sequence(aspect)
    aspect_len = np.sum(aspect_indices != 0)
    text_bert_indices = tokenizer.text_to_sequence('[CLS] ' + text_left + " " + aspect + " " + text_right + ' [SEP] ' + aspect + " [SEP]")
    text_raw_bert_indices = tokenizer.text_to_sequence(
        "[CLS] " + text_left + " " + aspect + " " + text_right + " [SEP]")
    bert_segments_ids = np.asarray([0] * (np.sum(text_raw_indices != 0) + 2) + [1] * (aspect_len + 1))
    bert_segments_ids = pad_and_truncate(bert_segments_ids, tokenizer.max_seq_len)
    aspect_bert_indices = tokenizer.text_to_sequence("[CLS] " + aspect + " [SEP]")

    return text_bert_indices, bert_segments_ids, text_raw_bert_indices, aspect_bert_indices


def get_classifier_from_file(arguments):

    parser = argparse.ArgumentParser()
    parser.add_argument('--model_name', default='lcf_bert', type=str)
    parser.add_argument('--best_model_path', type=str)
    parser.add_argument('--dataset', default='laptop', type=str, help='twitter, restaurant, laptop')
    parser.add_argument('--train_file', type=str)
    parser.add_argument('--test_file', type=str)
    parser.add_argument('--optimizer', default='adam', type=str)
    parser.add_argument('--initializer', default='xavier_uniform_', type=str)
    parser.add_argument('--learning_rate', default=2e-5, type=float, help='try 5e-5, 2e-5 for BERT, 1e-3 for others')
    parser.add_argument('--dropout', default=0.1, type=float)
    parser.add_argument('--l2reg', default=0.01, type=float)
    parser.add_argument('--num_epoch', default=1, type=int, help='try larger number for non-BERT models')
    parser.add_argument('--batch_size', default=16, type=int, help='try 16, 32, 64 for BERT models')
    parser.add_argument('--log_step', default=5, type=int)
    parser.add_argument('--embed_dim', default=300, type=int)
    parser.add_argument('--hidden_dim', default=300, type=int)
    parser.add_argument('--bert_dim', default=768, type=int)
    parser.add_argument('--pretrained_bert_name', default='bert-base-uncased', type=str)
    parser.add_argument('--max_seq_len', default=80, type=int)
    parser.add_argument('--polarities_dim', default=3, type=int)
    parser.add_argument('--hops', default=3, type=int)
    parser.add_argument('--device', default=None, type=str, help='e.g. cuda:0')
    parser.add_argument('--seed', default=None, type=int, help='set seed for reproducibility')
    parser.add_argument('--valset_ratio', default=0, type=float, help='set ratio between 0 and 1 for validation support')
    # The following parameters are only valid for the lcf-bert model
    parser.add_argument('--local_context_focus', default='cdm', type=str, help='local context focus mode, cdw or cdm')
    parser.add_argument('--SRD', default=3, type=int, help='semantic-relative-distance, see the paper of LCF-BERT model')
    opt = parser.parse_args(arguments)


    model_classes = {
        'bert_spc': BERT_SPC,
        'aen_bert': AEN_BERT,
        'lcf_bert': LCF_BERT
    }
    # set your trained models here
    # state_dict_paths = {
    #     'lcf_bert': 'state_dict/lcf_bert_laptop_val_acc0.2492',
    #     'bert_spc': 'state_dict/bert_spc_laptop_val_acc0.268',
    #     'aen_bert': 'state_dict/aen_bert_laptop_val_acc0.2006'
    # }

    #opt = model_data.opt
    opt.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    tokenizer = Tokenizer4Bert(opt.max_seq_len, opt.pretrained_bert_name)
    bert = BertModel.from_pretrained(opt.pretrained_bert_name)
    model = model_classes[opt.model_name](bert, opt).to(opt.device)
    
    print('loading model {0} ...'.format(opt.model_name))
    #model.load_state_dict(torch.load(state_dict_paths[opt.model_name]))
    print('Loading model:',opt.best_model_path,'...')
    model.load_state_dict(torch.load(opt.best_model_path))
    model.eval()
    torch.autograd.set_grad_enabled(False)
    print('Loading model:',opt.best_model_path,'... OK')

    # input: This little place has a cute interior decor and affordable city prices.
    # text_left = This little place has a cute 
    # aspect = interior decor
    # text_right = and affordable city prices.
    classifier = {}
    classifier['tokenizer'] = tokenizer
    classifier['bert'] = bert
    classifier['model'] = model
    classifier['opt'] = opt

    return classifier


def get_classifier(model_data):

    model_classes = {
        'bert_spc': BERT_SPC,
        'aen_bert': AEN_BERT,
        'lcf_bert': LCF_BERT
    }
    # set your trained models here
    # state_dict_paths = {
    #     'lcf_bert': 'state_dict/lcf_bert_WhatsApp_val_acc0.8973',
    #     'bert_spc': 'state_dict/bert_spc_laptop_val_acc0.268',
    #     'aen_bert': 'state_dict/aen_bert_laptop_val_acc0.2006'
    # }

    opt = model_data.opt
    opt.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    tokenizer = Tokenizer4Bert(opt.max_seq_len, opt.pretrained_bert_name)
    bert = BertModel.from_pretrained(opt.pretrained_bert_name)
    model = model_classes[opt.model_name](bert, opt).to(opt.device)
    
    print('loading model {0} ...'.format(opt.model_name))
    #model.load_state_dict(torch.load(state_dict_paths[opt.model_name]))
    print('Loading model:',model_data.best_model_path,'...')
    model.load_state_dict(torch.load(model_data.best_model_path))
    model.eval()
    torch.autograd.set_grad_enabled(False)
    print('Loading model:',model_data.best_model_path,'... OK')

    # input: This little place has a cute interior decor and affordable city prices.
    # text_left = This little place has a cute 
    # aspect = interior decor
    # text_right = and affordable city prices.
    classifier = {}
    classifier['tokenizer'] = tokenizer
    classifier['bert'] = bert
    classifier['model'] = model
    classifier['opt'] = opt

    return classifier


def fit(classifier, review=['This little place has a cute', 'interior decor', 'and affordable city prices.']):

    text_bert_indices, bert_segments_ids, text_raw_bert_indices, aspect_bert_indices = \
        prepare_data(review[0], review[1], review[2], classifier['tokenizer'])
    
    opt = classifier['opt']
    
    text_bert_indices = torch.tensor([text_bert_indices], dtype=torch.int64).to(opt.device)
    bert_segments_ids = torch.tensor([bert_segments_ids], dtype=torch.int64).to(opt.device)
    text_raw_bert_indices = torch.tensor([text_raw_bert_indices], dtype=torch.int64).to(opt.device)
    aspect_bert_indices = torch.tensor([aspect_bert_indices], dtype=torch.int64).to(opt.device)
    if 'lcf' in opt.model_name:
        inputs = [text_bert_indices, bert_segments_ids, text_raw_bert_indices, aspect_bert_indices]
    elif 'aen' in opt.model_name:
        inputs = [text_raw_bert_indices, aspect_bert_indices]
    elif 'spc' in opt.model_name:
        inputs = [text_bert_indices, bert_segments_ids]
    outputs = classifier['model'](inputs)
    t_probs = F.softmax(outputs, dim=-1).cpu().numpy()

    result = {}
    result['confidences'] = list(t_probs[0])
    result['iob'] = (t_probs.argmax(axis=-1)-1)[0]

    
    return result
    #print('t_probs = ', t_probs)
    #print('aspect sentiment = ', t_probs.argmax(axis=-1) - 1)


In [None]:
#Carregando modelo
parameters = ['--model_name','lcf_bert','--best_model_path','./state_dict/lcf_bert_all_app_val_acc0.9148']  # esses parametros simulam os argumentos de linha de comando
classifier = get_classifier_from_file(parameters)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=440473133.0, style=ProgressStyle(descri…


loading model lcf_bert ...
Loading model: ./state_dict/lcf_bert_all_app_val_acc0.9148 ...
Loading model: ./state_dict/lcf_bert_all_app_val_acc0.9148 ... OK


In [None]:
!gdown https://drive.google.com/uc?id=1palUtrBuQyclun3kEpjadkO77mgzRldZ

Downloading...
From: https://drive.google.com/uc?id=1palUtrBuQyclun3kEpjadkO77mgzRldZ
To: /content/ubereats-final-extract-bysentence.txt
0.00B [00:00, ?B/s]16.3MB [00:00, 161MB/s]27.3MB [00:00, 164MB/s]


In [None]:
!wc -l ubereats-final-extract-bysentence.txt

183195 ubereats-final-extract-bysentence.txt


In [None]:
!split -l 6000 -d ubereats-final-extract-bysentence.txt ubereats/ubereats-

In [None]:
!cp ubereats/ubereats-13 teste.csv 

In [None]:
# lendo arquivo de teste
f = open('teste.csv', 'r')
test_data = f.readlines()
f.close()
len(test_data)

6000

In [None]:
#Gerando lista de sentenças
sentences = list()
for sentence in test_data:
  sentences.append(sentence.strip())
  
for s in sentences:
  print(s)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
04/14/19 - Worst Food delivery ever!! Very late delivery. For the last 2orders i had to wait about 2 and a half hours for my food. Atleast it should show proper estimated time of food delivery. I wont recommend anyone to use Uber eats .
10/28/18 - *Very poor* No customer service. Order took extremely long more than 2 hours for 20 minutes delivery. No point of contact or order cancelation option. No correct order status in the app. No estimated time of arrival.
01/29/20 - Absolutely awful, tried ordering and they just would not let my order go through and I had honestly believed that this would be an app I could count on
03/28/19 - Worst service experience I've ever had. u don't have option to cancel the order, with in 2mins of order it shows as order us in the way, Even the poor customer executive can't help me dey keep saying same thing again and again.........
11/23/19 - Extremely poor service. They don't have riders, a

In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
def get_lcf_iob(review_sentence):
  doc = nltk.word_tokenize(review_sentence)

  tokens = []
  for token in doc: tokens.append(str(token))

  results = []

  for i in range(0,len(tokens)):
    aspect_candidate = ''
    left = ''
    right = ''
    for j in range(0,len(tokens)):
      if i==j: aspect_candidate = tokens[j]
      if j < i: left += tokens[j]+' '
      if j > i: right += tokens[j]+' '
    

    # print(left)
    # print(aspect_candidate)
    # print(right)

    review = [left,aspect_candidate,right]

    lcf_classifier = fit(classifier, review)
    results.append([aspect_candidate,lcf_classifier])


  return results

In [None]:
output = []
for r in sentences:
  print(r + "\n")
  results = get_lcf_iob(r)
  print(results)
  print(r + "\n")
  aspects = []
  for item in results:
    if item[1]['iob']!=-1: aspects.append(item[0])
  print('aspects = ',aspects)
  print('---------')
  output.append([r,aspects,results])
  break

03/31/19 - worst app there is no option to cancel order whenever you place it by mistake.

[['03/31/19', {'confidences': [0.99417984, 0.0022933583, 0.0035267377], 'iob': -1}], ['-', {'confidences': [0.9965379, 0.0010691968, 0.0023929125], 'iob': -1}], ['worst', {'confidences': [0.9954058, 0.0015137915, 0.00308044], 'iob': -1}], ['app', {'confidences': [0.996747, 0.0008766969, 0.0023763282], 'iob': -1}], ['there', {'confidences': [0.99663025, 0.00063609227, 0.0027336956], 'iob': -1}], ['is', {'confidences': [0.99463695, 0.0011914244, 0.0041716015], 'iob': -1}], ['no', {'confidences': [0.99178267, 0.0011572195, 0.0070600947], 'iob': -1}], ['option', {'confidences': [0.012218287, 0.087345585, 0.90043604], 'iob': 1}], ['to', {'confidences': [0.960215, 0.0042738654, 0.035511147], 'iob': -1}], ['cancel', {'confidences': [0.09229382, 0.8677904, 0.03991573], 'iob': 0}], ['order', {'confidences': [0.014951816, 0.19970618, 0.78534204], 'iob': 1}], ['whenever', {'confidences': [0.9729591, 0.00204

In [None]:
get_lcf_iob('there is a glitch when you open it')

[['there',
  {'confidences': [0.99605846, 0.00069986517, 0.0032417593], 'iob': -1}],
 ['is', {'confidences': [0.9957712, 0.00091370125, 0.003315104], 'iob': -1}],
 ['a', {'confidences': [0.9949556, 0.0012171566, 0.0038272056], 'iob': -1}],
 ['glitch', {'confidences': [0.61788833, 0.33006707, 0.05204457], 'iob': -1}],
 ['when', {'confidences': [0.9884594, 0.0008530393, 0.0106876455], 'iob': -1}],
 ['you', {'confidences': [0.97343576, 0.0023519252, 0.024212297], 'iob': -1}],
 ['open', {'confidences': [0.11004845, 0.8650229, 0.024928685], 'iob': 0}],
 ['it', {'confidences': [0.96795714, 0.0021353127, 0.02990755], 'iob': -1}]]

In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

In [None]:
from nltk.tokenize import sent_tokenize 
  

output = []
i = 0
for r in sentences:
  temp = r.split(' - ')
  r = r.replace(temp[0]+' - ','')

  for sentence in sent_tokenize(r) :  
    results = get_lcf_iob(sentence)

    aspects = []
    for item in results:
      if item[1]['iob']!=-1: aspects.append(item[0])
    
    #print([r,aspects,results])
    output.append([r,aspects,results])
  

str_out = "\n".join(str(e) for e in output)  

with open('ubereats/ubereats-parcial-result-13.txt', 'w') as f:
  f.write(str_out)

In [None]:
#%cp -av "ubereats/ubereats-parcial-result-13.txt" "/content/drive/MyDrive/Experimentos/ubereats" 

In [None]:
 import smtplib

server = smtplib.SMTP('smtp.gmail.com', 587)
server.starttls()
server.login("vitormesaque@gmail.com", "m3s4k3./")

msg = "COLAB WORK FINISH ALERT!"
server.sendmail("vitormesaque@gmail.com", "vitormesaque@gmail.com", msg)
server.quit()

SMTPAuthenticationError: ignored