-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.py
53 lines (42 loc) · 1.88 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import os
from model import *
tokenizer, total_words, tweets = tokenize("clean_trump_15k.csv", 10000)
# create input sequences using list of tokens
input_sequences = []
for tweet in tweets.tweet:
token_list = tokenizer.texts_to_sequences([tweet])[0]
for i in range(1, len(token_list)):
n_gram_sequence = token_list[max(0, i+1-10):i+1]
input_sequences.append(n_gram_sequence)
print(len(input_sequences))
np.random.shuffle(input_sequences)
# pad sequences
max_sequence_len = max([len(x) for x in input_sequences])
min_sequence_len = min([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
print("sequence length max: ", max_sequence_len, ", min: ", min_sequence_len, "\n")
# create predictors and label
predictors, labels = input_sequences[:,:-1],input_sequences[:,-1]
labels = ku.to_categorical(labels, num_classes=total_words)
# Create a basic model instance
model = create_model(total_words, max_sequence_len)
folder = "training_8/"
checkpoint_path = folder+"cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
save_weights_only=True,
verbose=1)
history = model.fit(predictors, labels, batch_size=128, epochs=100, verbose=1,
validation_split=0.05,
#validation_data=(val_predictors, val_labels),
callbacks=[cp_callback])
hist_df = pandas.DataFrame(history.history)
hist_csv_file = folder+'history.csv'
with open(hist_csv_file, mode='w') as f:
hist_df.to_csv(f)
f = open(folder+'sample_sequences.txt', 'w+')
for i in range(10):
f.write(generate_seq(tokenizer, model))
f.write("\n")
f.close()