# The Pessimistic Machine

The Pessimistic is a proof of concept for style adaptation. The machine takes a sentence as input and returns a sentence addressing a similar object but conveying a negative sentiment.

The machine needs to be fed with the latent representations of the sentences from the dataset which can be obtained using the script *compute_latent_representations.py*.

In [141]:
import pandas as pd
import numpy as np
import time
import datetime 
import json
from tqdm import tqdm
import os
import tensorflow as tf
import seaborn as sns
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 
from bokeh.io import output_notebook
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.models import HoverTool
output_notebook()

from data_utils_LMR import prepare_data,read_data, EncoderDecoder
from model import Vrae as Vrae_model
from batch import Generator

prepare_data(1000)

training_dir = 'logs/'
training_dir += 'no_char2word'

# sentiment analyzer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sentimentAnalyzer = SentimentIntensityAnalyzer()
def getSentimentScore(sentence):
    scores = sentimentAnalyzer.polarity_scores(sentence)
    return (scores['neg'], scores['neu'] ,scores['pos'])

class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__
    
def string2bool(st):
    if st.lower() == "true":
        return True
    else:
        return False
    
with open(training_dir +'/flags.json', 'r') as fp:
    FLAGS = dotdict(json.loads( fp.read() ) )
    
for k,v in FLAGS.iteritems():
    print k,':',v
      
n_samples = 5000#int(FLAGS.batch_size)

Downloading data from data_LMR/..
Data already downloaded.
Creating Vocabulary..
Vocabulary already created.
Converting sentences to sequences of ids..
Moving some line from test set to train set..
latent_dim : 16
sequence_max : 30
decoder_num_layers : 2
beta_period : 500
training_dir : logs/no_char2word
epoches : 10000
batch_size : 600
char2word_num_layers : 2
initialize : False
teacher_forcing : True
learning_rate_change_rate : 3000
beta_offset : 15
encoder_num_layers : 2
char2word_state_size : 256
sequence_min : 8
use_char2word : False
dtype_precision : 32
cell : GRU
use_sentiment_feature : False
initial_learning_rate : 0.001
decoder_state_size : 1024
output_keep_prob : 0.5
acceptable_accuracy : 0.4
peephole : True
input_keep_prob : 0.9
encoder_state_size : 1024
latent_loss_weight : 0.01


## k-NN Decoder

In [142]:
labels = []
zs = []
with tf.gfile.GFile(training_dir + "/latent_representations.txt" , mode="r") as source_file:
    source = source_file.readline()
    counter = 0
    while source:
        source = source_file.readline()
        if len(source.split('|')) > 1:
            z_ =  [ float(u) for u in source.split('|')[1].split(',')]
            if len(z_) == 16:
                labels.append(source.split('|')[0])
                zs.append(z_ )
            counter += 1
print len(zs), 'points'

77976 points


In [143]:
from sklearn.neighbors import KDTree
kdt = KDTree(np.array(zs), leaf_size=1,metric='euclidean')
def getNeighbor(zz, n_similar = 5):
    """ 
    take a z value and returns the neighrest neighbor in the latent space from the training set
    """
    dist, ind = kdt.query( zz, k=n_similar)
    return [ labels[k] for k in list(ind[0]) ], dist

## The VAE model

In [144]:
with open(training_dir +'/training_parameters.json', 'r') as fp:
    training_parameters = json.loads( fp.read() )
# vocabulary encoder-decoder
encoderDecoder = EncoderDecoder()
num_symbols = encoderDecoder.vocabularySize()
# prepare data
sentences, ratings = read_data( max_size=None, 
                               max_sentence_size=training_parameters['seq_max'],
                               min_sentence_size=int(FLAGS.sequence_min), 
                               test=False) 
print len(sentences), " sentences"

encoderDecoder = EncoderDecoder()

config = tf.ConfigProto(
        device_count = {'GPU': 0}, # do not use GPU for testing
    )

FLAGS.peephole = False
# load model
vrae_model = Vrae_model(char2word_state_size = int(FLAGS.char2word_state_size), 
                     char2word_num_layers = int(FLAGS.char2word_num_layers), 
                     encoder_state_size = int(FLAGS.encoder_state_size), 
                     encoder_num_layers = int(FLAGS.encoder_num_layers), 
                     decoder_state_size = int(FLAGS.decoder_state_size), 
                     decoder_num_layers = int(FLAGS.decoder_num_layers), 
                          latent_dim=int(FLAGS.latent_dim),
                         batch_size=n_samples,
                         num_symbols=num_symbols,
                        latent_loss_weight=float(FLAGS.latent_loss_weight),
                         dtype_precision=FLAGS.dtype_precision,
                        cell_type=FLAGS.cell, 
                        peephole=FLAGS.peephole,
                        input_keep_prob=float(FLAGS.input_keep_prob),
                        output_keep_prob=float(FLAGS.output_keep_prob),
                      sentiment_feature = string2bool(FLAGS.use_sentiment_feature),
                      use_char2word = string2bool(FLAGS.use_char2word) 
                       )
def zToXdecoded(session,z_sample,s_length):
    x_reconstruct = vrae_model.zToX(session,z_sample,s_length)
    return encoderDecoder.prettyDecode( np.argmax(x_reconstruct[0], axis= 1) ) 

  reading data line 10000
  reading data line 20000
  reading data line 30000
  reading data line 40000
  reading data line 50000
  reading data line 60000
  reading data line 70000
77977  sentences


## The Pessimist Machine

In [153]:
def MachineSays(sess,u,n_sample = 20):
    #print train_dir
    sent = getSentimentScore(u)
    sent_index = 0 # grumpy  
    zz = vrae_model.XToz(sess, *encoderDecoder.encodeForTraining(u),sentiment=getSentimentScore(u))[0]
    res, dist = getNeighbor( [list(zz)] ,n_sample)
    if u.lower() in res:
        res.remove(u.lower())
    out = []
    for uu in sorted(zip(res,list(dist[0])), key=lambda x : getSentimentScore(x[0])[sent_index] , reverse=True):
        out.append(uu[0])
    return out
    #print "\n",dist

In [154]:
answers = []
saver = tf.train.Saver()
with tf.Session(config=config) as sess:
    saver.restore(sess, "./"+training_dir+'/model.ckp')
    for uu in MachineSays(sess,"I like this movie.", 50):
        print uu

INFO:tensorflow:Restoring parameters from ./logs/no_char2word/model.ckp
i hate this movie.
i hate this movie.
i like this movie.
i liked this movies.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i love this movie.
i liked this movie.
i liked this movie.
i liked this movie.
i liked this movie.
i liked this movie.
i liked this movie.
i liked this movie.
i liked this movie.
i liked this movie.
i liked this movie.
i like the movie.
i like it.
i like it.
like this one.
i love this.
i saw this movie on t.
i love this movie a lot.
i liked those movies.

In [152]:
us = [ "I totally loved it.",
"I was really bad.",
"it was terrible.",
"the acting was mostly good.",
"I liked this movie.",
"it was a nice movie.",
"the story was amazing.",
"it was not bad.",
"The acting was good.",
"The music was good.",
]

answers = []
saver = tf.train.Saver()
with tf.Session(config=config) as sess:
    saver.restore(sess, "./"+training_dir+'/model.ckp')
    for u in us:
        answers.append( MachineSays(sess,u)[0] )
df = pd.DataFrame()
df["input"] = us
df["answer"] = answers
df

INFO:tensorflow:Restoring parameters from ./logs/no_char2word/model.ckp


Unnamed: 0,input,answer
0,I totally loved it.,i totally hated the movie.
1,I was really bad.,it' s really bad.
2,it was terrible.,thats terrible.
3,the acting was mostly good.,the acting is mostly horrendously bad.
4,I liked this movie.,i liked this movie.
5,it was a nice movie.,this is a waste of time and money.
6,the story was amazing.,the story was bad.
7,it was not bad.,it was bad.
8,The acting was good.,the irony is horrible.
9,The music was good.,the music is horrendous.


In [122]:
print df.to_latex()

\begin{tabular}{lll}
\toprule
{} &                        input &                                  answer \\
\midrule
0 &          I totally loved it. &              i totally hated the movie. \\
1 &            I was really bad. &                       it' s really bad. \\
2 &             it was terrible. &                         thats terrible. \\
3 &  the acting was mostly good. &  the acting is mostly horrendously bad. \\
4 &          I liked this movie. &                     i liked this movie. \\
5 &         it was a nice movie. &      this is a waste of time and money. \\
6 &       the story was amazing. &                      the story was bad. \\
7 &              it was not bad. &                             it was bad. \\
8 &         The acting was good. &                  the irony is horrible. \\
9 &          The music was good. &                the music is horrendous. \\
\bottomrule
\end{tabular}



## neighbordhood

In [125]:
us = [ "I totally loved it.",
"I was really bad.",
"it was terrible.",
"the acting was mostly good.",
"I liked this movie.",
"it was a nice movie.",
"the story was amazing.",
"it was not bad.",
"The acting was good.",
"The music was good.",
]

saver = tf.train.Saver()
df = pd.DataFrame()
with tf.Session(config=config) as sess:
    saver.restore(sess, "./"+training_dir+'/model.ckp')
    #for _ in range(20):
    for u in us:
        k = int(np.random.random() * len(sentences))
        u = encoderDecoder.prettyDecode(sentences[k])
        l = MachineSays(sess,u,20)
        while len(l) < 20:
            l.append("")
        df[u] = l
df

INFO:tensorflow:Restoring parameters from ./logs/no_char2word/model.ckp


Unnamed: 0,she could blow at any moment!,and their chemistry is impeccable.,a positive message from a positive film.,i was 0 when i saw it.,the girl is dying!,did they ask the fans?,i' m only a child.,"this film however, is very amusing.",it' s like water.,the hillermans were a family again.
0,"sadly, somebody has lied to frank.",were the vietnamese all suicidal?,a virus killed 0 billion people.,what is a thief?,terrible i know.,know what else is torture?,i' m only a child.,the girl' s reactions just seem dumb.,it' s lame!,the killers are dark with real menace.
1,"so well made, no cgi crap.",and the violence changes them.,impossible to improve or ignore.,i cried when i saw this sequence.,the gritty' license to kill',did they not watch this mess?,i' m only a child.,the worse film i have every seen.,it' s really a shame.,the world is a miserable and hard place.
2,how could a film so bad be made?,and the sad thing isinternally she dies.,i prefer more vulnerable heroes.,it was hot and i was tired.,the dialogue is bad.,did they inhabit the rv?,i' m only a child.,this is a horrible film in every way.,it' s like hell.,the kids are all a mess.
3,the colour is dull and grey.,and the car chase is simply amazing.,iturbi provides some fine piano playing.,i was 0 when i first saw this on tv.,this guy is a loser.,need that cult cred!,i' m only a child.,"he' s tragic, but also elegant.",it' s like a guilty pleasure.,the lead actress here is awful.
4,should i be amazed at how bad it is?,and the direction?,a masterpiece in romance filmmaking.,it was a night that i enjoyed.,there is no dialog.,did they edit out the water?,i' m only a child.,this film had feeling.,it' s like a paperweight.,the dialouge was embarrassing at times.
5,the guy looks halfmad anyway.,watch the little mermaid instead.,a vehicle for michael caine.,i was 00 then.,this film has no action.,and save the day.,i' m only a child.,this film is forever lodged in my brain.,he' s like a child.,the shyster lawyer is naturally a man.
6,she could tackle any part.,and the dedication page.,my family truly enjoyed this movie.,i saw ray when it first came out.,the dialog was inane.,can i get them back?,i' m only a child.,this movie will be a favorite of mine.,it' s like a fetish!,the leading actors are a nuisance.
7,should have been a 0!,nearly the entire cast is incredible.,my favorite country star is from kansas.,i gave it 0 stars.,the gore is all cgi.,do they care if we care?,i' m only a child.,this film however was mesmerizing.,it' s laughable.,"the wolfgod again, apparently."
8,she could be smart.,and dig the lesbian cid agents!,a fine piece of captured femininity.,i gave it 0 stars.,the girl did an okay job.,and send her to paris!,i' m only a child.,this would' ve been a great silent film.,it' s like a wireless ad help me!,the lead actress flubbed a line?
9,should children be able to enjoy a show?,are they speaking spanish?,jamie foxx transformed himself into ray.,i gave it a 00 rating.,the dialog.,dance with me?,i' m only a child.,this film deserves a wide audience.,i like' the big screen'.,there is hardly any musical score.


In [120]:
print df.to_latex()

\begin{tabular}{lllllllllll}
\toprule
\midrule
0  &                 cheated on all fronts! &         his fear was because of his guilt. &      this is one of them. &       guess what it took to kill killjoy? &                plain stupid! &      this film is so bad that its painful. &     yikes! &       disappointed. &        i hate this film with a vengeance. &                 he' s flat wrong. \\
1  &          charming film but naive film. &     he is neither a good man or a bad man. &      this is one of them. &    watch this movie forget your troubles. &             plain stupidity? &       this movie was horrible, simply put. &     yikes! &         disappoint. &       although i missed the last episode. &             he should be ashamed. \\
2  &  well and culmination fight full orgy. &  he is driven to find and stop the killer. &      this is one of them. &           what is wrong with some of you? &         the plot was stupid. &      this movie is also very sad at times. &     