In [1]:
import scraper

In [13]:
from __future__ import absolute_import, division, print_function

import tensorflow as tf
tf.enable_eager_execution()

import numpy as np
import os
import time

In [3]:
subreddits = ['AskReddit', 'AskWomen', 'AskMen']
scraper.scrape(subreddits)

Fetching from  AskReddit
Fetching from  AskWomen
Fetching from  AskMen


In [14]:
text_list = []
vocab_dict = {}
for sub in subreddits:   
    # Read, then decode for py2 compat.
    text = open('./top'+sub+'.txt', 'rb').read().decode(encoding='utf-8')
    # The unique characters in the file
    vocab = sorted(set(text))
    vocab_dict[sub] = vocab
    # Creating a mapping from unique characters to indices
    char2idx = {u:i for i, u in enumerate(vocab)}
    idx2char = np.array(vocab)

    text_as_int = np.array([char2idx[c] for c in text])
    
    text_list.append(text_as_int)
    

In [5]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

In [6]:
# Buffer size to shuffle the dataset
BUFFER_SIZE = 10000

# Batch size 
BATCH_SIZE = 64

# The maximum length sentence we want for a single input in characters
seq_length = 100

# list to hold datasets
datasets = []

for text in text_list:

    examples_per_epoch = len(text)//seq_length ###!

    # Create training examples / targets
    char_dataset = tf.data.Dataset.from_tensor_slices(text) ####!!

    sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

    dataset = sequences.map(split_input_target)

    steps_per_epoch = examples_per_epoch//BATCH_SIZE
    
    # Create dataset
    dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
    
    datasets.append(dataset)


In [7]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, 
                              batch_input_shape=[batch_size, None]),
    rnn(rnn_units,
        return_sequences=True, 
        recurrent_initializer='glorot_uniform',
        stateful=True),
    tf.keras.layers.Dense(vocab_size)
    ])
    return model

In [8]:
# The embedding dimension 
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

if tf.test.is_gpu_available():
    print("GPU!")  
    rnn = tf.keras.layers.CuDNNGRU
else:
    print("no gpu :(")  
    import functools
    rnn = functools.partial(
        tf.keras.layers.GRU, recurrent_activation='sigmoid')

models = []
for sub in subreddits:
    # Length of the vocabulary in chars
    vocab_size = len(vocab_dict[sub]) ##!

    model = build_model(
        vocab_size = len(vocab_dict[sub]), 
        embedding_dim=embedding_dim, 
        rnn_units=rnn_units, 
        batch_size=BATCH_SIZE)

    models.append(model)


GPU!


In [9]:
def loss(labels, logits):
    return tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)

EPOCHS = 100

histories = []
for model,sub,dataset in zip(models,subreddits,datasets):
    model.compile(
        optimizer = tf.train.AdamOptimizer(),
        loss = loss)
    
    # Directory where the checkpoints will be saved
    checkpoint_dir = './'+sub+'100checkpoint'
    # Name of the checkpoint files
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

    checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_prefix,
        save_weights_only=True)
    
    print('\nTraining for '+sub)
    
    history = model.fit(dataset.repeat(), epochs=EPOCHS, steps_per_epoch=steps_per_epoch, callbacks=[checkpoint_callback])
       
    histories.append(history)



Training for AskReddit
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/1

Epoch 99/100
Epoch 100/100

Training for AskMen
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100

In [10]:
def generate_text(model, start_string, vocab, num_generate=10000):
    # Evaluation step (generating text using the learned model)  

    # Converting our start string to numbers (vectorizing) 
    char2idx = {u:i for i, u in enumerate(vocab)}
    idx2char = np.array(vocab)
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    # Empty string to store our results
    text_generated = []

    # Low temperatures results in more predictable text.
    # Higher temperatures results in more surprising text.
    # Experiment to find the best setting.
    temperature = 1.0

    # Here batch size == 1
    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

        # using a multinomial distribution to predict the word returned by the model
        predictions = predictions / temperature
        predicted_id = tf.multinomial(predictions, num_samples=1)[-1,0].numpy()

        # We pass the predicted word as the next input to the model
        # along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(idx2char[predicted_id])

    return (start_string + ''.join(text_generated))

In [12]:

for sub,model in zip(subreddits,models):
    checkpoint_dir = './'+sub+'100checkpoint'

    print(len(vocab_dict[sub]))
    
    model = build_model(len(vocab_dict[sub]), embedding_dim, rnn_units, batch_size=1)
    model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
    model.build(tf.TensorShape([1, None]))

    print('\nGenerating for '+sub)
    print(generate_text(model, start_string=u"What", num_generate=10000, vocab=vocab_dict[sub]))



89

Generating for AskReddit
What's something embarrassing you're willing to admit?
What's your "It we like to hoG the What other skills have you inconappen during sex that nobody warn or stranger with our pet staff help?
Daughters of reddit, what is something that is a 'minat do you feel like you're missing out on?
What professionals of Reddit, what is the biggest holy-shit-red-fle most to ese 18 hingrace you were definitely not sund al well ret $10,000. Do you take it, and why or why not?
Redditors who rarely child you know you were staged in you’ve always wanted to tell on r/arass adult?
Whing that has hon Spictureakes people notical internet that most people don’t know about?
What’s o strew did you face on a day to doneak, have wants to scamity in 10d without ragious, in the same interviel de all of the year 2019?
[Serious] Depressed people wants to have all the reddit?
What's the fastest way you've seen someone improve their life?
Einstein said “If you can't explain it to a sign y

What is something you own that is overkill for the purpose you bought it?
Why is it so others something you've genuinely not couple you've seen?
What’s your go-to volumizing problem?
What parther then they're not really doing it because they want to"?
Is anyone else sick-to-death of getting relationship zoned?
Women who didn’t do particularly well in school but found later success her?
Why do you think a friend of yours will be single forever?
What don't you enjyour mind and/or frustrated you today, An?
Have you ever been helped out by a guy who noticed another policy?
How often do you have trouble telling apart someone just being friendly wedding on a first date?
What's your go-to "comfort food" tv show?
What beauth other women enforcing toxic masculinity? I'm a baby by thems and elected to have thoughts ones from reddit. All your questions, cross your mind when you saw a ere some "fecred up bleakup?
How do you prevent yourself from melting into a lazy puddle of goo the moment you wal

What’s the most important realization you've ever dee women need to start asian day commonly a blowing condects of it?
I just found out my subreddit is still addicted to the ndwatching too much Seinfeld?
We always hear the negative aspects of being an adula but don’t dane on my wife eaten a lost for words.
Too can I the advice and what are the drawbacks to your lifesty clach?
My fivery died 6 had tigs. What ir the gime I make the first thing that backet byou were ment you experienced?
What's your friendship breaker" you've seen from a coworker?
What messagetlenter the us GFandg a job intervie?
Men who have driven fire trucks, how cool was that?
What's your favorite NSFW subs, your SO does to make you feel immensely loved?
What is the point felt that they were terribly failing in life, how did you turnars from her?
Somiticans gobalance college, work, social life, from himins, what are good places to a board meeting with politicians and executives in the field. I have never been in a sta