In [145]:
from keras.layers import LSTM, Bidirectional, GlobalMaxPool1D, Dropout
from nltk.stem import PorterStemmer, WordNetLemmatizer
from keras.layers import Dense, Activation, Dropout
from keras.layers import Dense, Embedding, Input
from tensorflow.keras.optimizers import RMSprop
from keras.preprocessing import text, sequence
from keras.utils.vis_utils import plot_model
from keras.utils.data_utils import get_file
from keras.callbacks import ModelCheckpoint
from keras.callbacks import LambdaCallback
from nltk.tokenize import TweetTokenizer
from geopy.exc import GeocoderTimedOut
from geopy.geocoders import Nominatim
from twython import TwythonStreamer
from keras.models import Sequential
from keras.models import load_model
from nltk.corpus import stopwords
from keras.models import Model
from textblob import TextBlob 
from itertools import product
from twython import Twython


from keras.layers import LSTM
from datetime import datetime

import pandas as pd
import numpy as np
import random
import pickle

import time
import nltk
import sys
import re
import io


In [146]:
pip install twython




In [147]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [148]:
class cleaningData:
    
    def __init__(self, noise_chars):
        self.noise_chars = noise_chars 
    
    def removeNoise(self, s):

        s = s.lower()  

        s = re.sub('@[A-Za-z0-9]+', '', s)
        s = re.sub('pleas', 'please', s)
        s = re.sub('&[a-zA-Z]+;', '', s)
        s = re.sub("â€\x9d&lt;", "", s)
        s = re.sub('dont', 'do not', s)
        s = re.sub('http\S+', '', s)
        s = re.sub("\\xa0·", " ", s)
        s = re.sub('www\S+', '', s)
        s = re.sub("(UTC)", " ", s)
        s = re.sub('^b\s+', '', s)
        s = re.sub("â€œ:", "", s)
        s = re.sub('<.*?>', '', s)
        
        for c in self.noise_chars:
            s = s.replace(c, '') 

        clean = s

        return clean
    
    
    def removeStopwords(self, s):

        words = []

        for char in s.split(" "):

          words.append(char) 


        SW = stopwords.words('english') 

        SW.extend(['that','thats',
                    'oh', 'aww', 'mr',
                    'r', 'what', 'etc',
                    'hey', 'within', 'foi',
                    'yeah', 'www', 'wa', 
                    'em', 'am', 'i', 'me',
                    'dialmformurderjpg' ])

        
        cln = [w for w in words if w not in SW] 

        clnSW = " ".join(cln)
        
        return clnSW
    
    def textNormalization(self, string):

        normalized = []
        tokenizer = nltk.tokenize.TweetTokenizer() 
        len_reduced = tokenizer.tokenize(string)
        for word in len_reduced:
            check_spell = TextBlob(word)
            normalized.append(str(check_spell.correct())) 
        normalized = " ".join(normalized)
        return normalized
    
    def stemWords(self, string):

        words = []
        stemmed = []
        for i in string.split(" "):
            words.append(i)
        stemmer = PorterStemmer() 
        stemmed_words = [stemmer.stem(w) for w in words] 
        for word in stemmed_words:
            check_spell = TextBlob(word) 
            stemmed.append(str(check_spell.correct()))
        stemmed = " ".join(stemmed)
        return stemmed
    

    
    def wordTokenize(self, string):

        regex = "[a-zA-Z]+" 
        tokenized = re.findall(regex, string) 
        return tokenized

    
    def process(self, s):

        clean_1 = self.removeNoise(s)
        
        cSW = self.removeStopwords(clean_1)
        cNor = self.textNormalization(cSW)
        cStemmed = self.stemWords(cNor)

        tokenized = " ".join(self.wordTokenize(cStemmed))
        text_ready = self.removeNoise(tokenized)
        preprocessed = self.removeStopwords(text_ready)

        return preprocessed

In [149]:
class preData:
  def __init__(self, pre_processor = None, max_features=20000, maxlen=100):
    self.preProcessor = pre_processor if pre_processor else PreProcessor("#@,.?!¬-\''=()") 
    self.max_features = max_features
    self.maxlen = maxlen

  def prepare_data(self, X):
    """
    This function prepares the data by performing preprocessing and vectorization.
    """
    try: #try if the data is more than 1 record
      preprocessed = []
      for comment in X:
          preprocessed.append(self.preProcessor.process(comment))
      pickle.dump(preprocessed, open('preprocessed_data.pickle','wb'))
      data_prepared = self.vectorize(preprocessed)
    except: #do if it is only 1 record
      preprocessed = self.preProcessor.process(X)
      data_prepared = self.vectorize(preprocessed)

    return data_prepared

  def vectorize(self, X):
    """
    This function vectorizes the preprocessed data.
    """
    list_sentences = X
    tokenizer = text.Tokenizer(num_words=self.max_features)
    tokenizer.fit_on_texts(list(list_sentences))
    list_tokenized = tokenizer.texts_to_sequences(list_sentences) 
    X_vector = sequence.pad_sequences(list_tokenized, maxlen=self.maxlen) 
    
    return X_vector

  def get_model(self):

    
    embed_size = 128
    inp = Input(shape=(self.maxlen, ))
    x = Embedding(self.max_features, embed_size)(inp) 
    x = Bidirectional(LSTM(50, return_sequences=True))(x) 
    x = GlobalMaxPool1D()(x) 
    x = Dropout(0.1)(x) 
    x = Dense(50, activation="relu")(x)   
    x = Dropout(0.1)(x)
    x = Dense(1, activation="sigmoid")(x) 
    model = Model(inputs=inp, outputs=x)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    return model

  def predict(self, X):

    X_vector = X
    self.model = self.get_model()
    self.model.load_weights('weights_base.best.hdf5') 
    return self.model.predict(X_vector)


  def evaluate(self, X, y):

    X_vector = X
    self.model = self.get_model()
    self.model.load_weights('weights_base.best.hdf5')
    loss, acc = self.model.evaluate(X_vector, y, verbose=2)
    print('Restored model, accuracy: {:5.2f}%'.format(100 * acc))



In [150]:
class Generative:
    def __init__(self):
        pass
    
    
    def get_info(self, text):
        text = text
        maxlen = 40
        step = 3
        sentences = []
        for i in range(0, len(text) - maxlen, step):
            sentences.append(text[i: i + maxlen])
        chars = pickle.load(open('chars.pickle','rb'))   
        char_indices = pickle.load(open('char_indices.pickle','rb'))
        indices_char = pickle.load(open('indices_char.pickle','rb'))
        return text, chars, char_indices, indices_char, maxlen, sentences
  
    def sample(self, preds, temperature=0.2):

        preds = np.asarray(preds).astype('float64')
        preds = np.log(preds) / temperature
        exp_preds = np.exp(preds)
        preds = exp_preds / np.sum(exp_preds)
        probas = np.random.multinomial(1, preds, 1)
        return np.argmax(probas)

    
    def generate_tweets(self, corpus, char_to_idx, idx_to_char, chars, maxlen, n_tweets=10, verbose=1):
        self.model = load_model('GenerativeModel_compiled_v2')
        self.model.load_weights('weights_E_v2.hdf5')
        global tweets
        tweets = []
        
        for i in range(1, n_tweets + 1):
            begin = random.randint(0, len(corpus) - maxlen - 1)
            tweet = u''
            sequence = corpus[begin:begin + maxlen]
            tweet += sequence
            if verbose:
                print('Tweet no. %03d' % i)
                print('=' * 13)
                print('Generating with seed:')
                print(sequence)
                print('_' * len(sequence))
            for _ in range(100):
                x = np.zeros((1, maxlen, len(chars)))
                for t, char in enumerate(sequence):
                    x[0, t, char_to_idx[char]] = 1.0

                preds = self.model.predict(x, verbose=0)[0]
                next_idx = generative.sample(preds)
                next_char = idx_to_char[next_idx]

                tweet += next_char
                sequence = sequence[1:] + next_char
            if verbose:
                print(tweet)
                print()
            tweets.append(tweet)
        
        twitter.update_status(status=tweets[random.randrange(10)])

       
        return tweets

In [151]:
APIKey= 'xug39dkpJteA2AUVzoo97mxD7'

APIKeySecret= 'I8Ufoblomatr9Gr7EwE7PtlI35Of1UZ4azOfQ8ZnHb0aPWmRqO'

AccesToken= '3542954414-RECdUr9wABHdZGlp9gBmtcfsaSnquwGXpbPC46Z'

AccessTokenSecret= 'dylU2sOub2dtYydoGRb9NKkzUrJVuPJC5PVsXioMIPop5'


In [152]:
cleanData = cleaningData("#@,.?!¬-\''=()")

pre_data = preData()

generative = Generative()

In [153]:
class MyStreamer(TwythonStreamer):
  
  def on_success(self, data):

    if 'extended_tweet' in data:
        

        text = data['extended_tweet']['full_text']
        clean_source = data['source']
        print(clean_source)
        text_prep = preprocessor.process(text)
        text_vectorized = classifier.vectorize(text_prep)
        prediction = classifier.predict(text_vectorized)
        class_prediction = (prediction > 0.5).astype("int32")
        if class_prediction.any() == 1:
            text, chars, char_indices, indices_char, maxlen, sentences = generative.get_info(text_prep)
            tweets = generative.generate_tweets(text, char_indices, indices_char,chars, maxlen)
        
        #stream.disconnect()
        #time.sleep(4)

        
    
    def on_error(self, status_code, data):
        print(status_code)
        return False

In [154]:
twitter = Twython(
    APIKey,
    APIKeySecret,
    AccesToken,
    AccessTokenSecret
)

In [155]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [156]:
stream = MyStreamer(
    APIKey,
    APIKeySecret,
    AccesToken,
    AccessTokenSecret
)
stream.statuses.filter(track='#covid19', language = "en", mode="extended")

<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>
Tweet no. 001
Generating with seed:
an promote million network direct messag
________________________________________




an promote million network direct messag south health astim covid variant vaccin covid quebar call geat support ectin covid vaccin work trin

Tweet no. 002
Generating with seed:
b onlyfan promote million network direct
________________________________________
b onlyfan promote million network direct alch alde covid vaccin work tring
850,trank covid eteermon pay quercit incl covid covidvari covid c

Tweet no. 003
Generating with seed:
e want sob onlyfan promote million netwo
________________________________________
e want sob onlyfan promote million networ covid vaccin covid
293,seen get kit etherf boyth pandem sife covid vaccin work tring
850,trank plo

Tweet no. 004
Generating with seed:
n promote million network direct message
________________________________________
n promote million network direct message shit health aspirit realli conter covid variant vaccin covid
292,covid provid test mote covid eteer

Tweet no. 005
Generating with seed:
etwork direct message us safe could happ
_

TwythonAuthError: ignored