<a href="https://colab.research.google.com/github/zuzannazak/MgrSarcasm/blob/master/mgr_sie%C4%87_neuronowa.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import json
import tensorflow as tf
import requests
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import LeakyReLU
import csv

#Sources that i don't want to keep looking for:
#  https://towardsdatascience.com/tensorflow-sarcasm-detection-in-20-mins-b549311b9e91
#  https://medium.com/analytics-vidhya/sarcasm-detection-with-neural-networks-1509578bb17b 
#  https://towardsdatascience.com/7-popular-activation-functions-you-should-know-in-deep-learning-and-how-to-use-them-with-keras-and-27b4d838dfe6
#  https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense

In [None]:
sentences = []
labels = []

with open('/content/drive/MyDrive/STUDIA/train-balanced.csv', 'r') as file:
    reader = csv.reader(file, delimiter='\t')
    for row in reader:
        labels.append(row[0])
        sentences.append(row[1])

In [None]:
print(pd.DataFrame({'sentence' : sentences[0:10], 'label':labels[0:10]}))

                                            sentence label
0                                   Dang dog, thanks     0
1  to summon the powers of the flying spaghetti m...     0
2       i did that 3rd last 1 by accident last night     0
3  He's insane, used him in DC, better than Blake...     0
4  Forgot about him, he's a pretty pointless card...     0
5                                                hey     0
6                                              yeah?     0
7                                               okay     0
8                                      Condensation?     0
9                                What type of juice?     0


In [None]:
# Splitting the dataset into Train and Test
training_size = round(len(sentences) * .75)
training_sentences = sentences[0:training_size]
testing_sentences = sentences[training_size:]
training_labels = labels[0:training_size]
testing_labels = labels[training_size:]

In [None]:
# Setting tokenizer properties
vocab_size = 50000
oov_tok = "<oov>"

In [None]:
# Fit the tokenizer on Training data
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index

In [None]:
# Setting the padding properties
max_length = 200
trunc_type='post'
padding_type='post'

In [None]:
# Creating padded sequences from train and test data
training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

In [None]:
# Setting the model parameters
embedding_dim = 16
leaky_relu = LeakyReLU(alpha=0.01)
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(50, activation=leaky_relu),
    #tf.keras.layers.Dense(20, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 200, 16)           800000    
_________________________________________________________________
global_average_pooling1d_3 ( (None, 16)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 50)                850       
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 51        
Total params: 800,901
Trainable params: 800,901
Non-trainable params: 0
_________________________________________________________________


In [None]:
# Converting the lists to numpy arrays for Tensorflow 2.x
training_padded = np.array(training_padded)
training_labels = np.array(training_labels, dtype='int32')
testing_padded = np.array(testing_padded)
testing_labels = np.array(testing_labels, dtype='int32')

print(training_padded.dtype)

int32


In [None]:
# # Training the model
num_epochs = 40
history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=2)




Epoch 1/40
26384/26384 - 249s - loss: 0.6032 - accuracy: 0.6704 - val_loss: 0.5738 - val_accuracy: 0.6942
Epoch 2/40
26384/26384 - 239s - loss: 0.5705 - accuracy: 0.7031 - val_loss: 0.5717 - val_accuracy: 0.7006
Epoch 3/40
26384/26384 - 249s - loss: 0.5590 - accuracy: 0.7123 - val_loss: 0.5608 - val_accuracy: 0.7093
Epoch 4/40
26384/26384 - 240s - loss: 0.5498 - accuracy: 0.7185 - val_loss: 0.5632 - val_accuracy: 0.7053
Epoch 5/40
26384/26384 - 238s - loss: 0.5431 - accuracy: 0.7234 - val_loss: 0.5608 - val_accuracy: 0.7099
Epoch 6/40
26384/26384 - 250s - loss: 0.5377 - accuracy: 0.7270 - val_loss: 0.5886 - val_accuracy: 0.6906
Epoch 7/40
26384/26384 - 244s - loss: 0.5328 - accuracy: 0.7303 - val_loss: 0.5656 - val_accuracy: 0.7082
Epoch 8/40
26384/26384 - 243s - loss: 0.5282 - accuracy: 0.7338 - val_loss: 0.5698 - val_accuracy: 0.7046
Epoch 9/40
26384/26384 - 243s - loss: 0.5240 - accuracy: 0.7365 - val_loss: 0.5710 - val_accuracy: 0.7065
Epoch 10/40
26384/26384 - 242s - loss: 0.5196 

In [None]:
sentence = ["Yes, definitely",
            "I like cats."]
sequences = tokenizer.texts_to_sequences(sentence)
padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
print(model.predict(padded))

NameError: ignored