<a href="https://colab.research.google.com/github/yarkalm/machinelearning/blob/main/Generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
import numpy as np

In [None]:
filename = "/content/drive/MyDrive/Colab Notebooks/МИИиНС/Generator/вдудь.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()

In [None]:
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [None]:
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  426304
Total Vocab:  101


In [None]:
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  426204


In [None]:
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [None]:
X.shape

(426204, 100, 1)

In [None]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [None]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 100, 256)          264192    
                                                                 
 dropout_2 (Dropout)         (None, 100, 256)          0         
                                                                 
 lstm_3 (LSTM)               (None, 256)               525312    
                                                                 
 dropout_3 (Dropout)         (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 86)                22102     
                                                                 
Total params: 811,606
Trainable params: 811,606
Non-trainable params: 0
_________________________________________________________________


In [None]:
filepath="/content/drive/MyDrive/Colab Notebooks/МИИиНС/Generator/weights/weights--{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [None]:
history = model.fit(X, y, epochs=15, batch_size=128, callbacks=callbacks_list)

In [None]:
i=1
while history.history['loss'][-1]>1.5:
  print(i)
  history = model.fit(X, y, epochs=1, batch_size=64, callbacks=callbacks_list)
  i+=1

1
Epoch 1: loss improved from 1.67186 to 1.67041, saving model to /content/drive/MyDrive/Colab Notebooks/МИИиНС/Generator/weights/weights--1.6704.hdf5
2
Epoch 1: loss improved from 1.67041 to 1.66658, saving model to /content/drive/MyDrive/Colab Notebooks/МИИиНС/Generator/weights/weights--1.6666.hdf5
3
Epoch 1: loss improved from 1.66658 to 1.66516, saving model to /content/drive/MyDrive/Colab Notebooks/МИИиНС/Generator/weights/weights--1.6652.hdf5
4
Epoch 1: loss improved from 1.66516 to 1.66459, saving model to /content/drive/MyDrive/Colab Notebooks/МИИиНС/Generator/weights/weights--1.6646.hdf5
5
Epoch 1: loss improved from 1.66459 to 1.66231, saving model to /content/drive/MyDrive/Colab Notebooks/МИИиНС/Generator/weights/weights--1.6623.hdf5
6
Epoch 1: loss improved from 1.66231 to 1.66106, saving model to /content/drive/MyDrive/Colab Notebooks/МИИиНС/Generator/weights/weights--1.6611.hdf5
7
Epoch 1: loss improved from 1.66106 to 1.65826, saving model to /content/drive/MyDrive/Colab

In [None]:
import os
from pathlib import Path

best = 3.0
p = Path("/content/drive/MyDrive/Colab Notebooks/МИИиНС/Generator/weights")
for file in p.rglob("*"):
  if float(str(file)[73:79]) <= best:
    best = float(str(file)[73:79])
  else:
    print(file)
    os.remove(file)
for file in p.rglob("*"):
  if str(file) != f"/content/drive/MyDrive/Colab Notebooks/МИИиНС/Generator/weights/weights--{best}.hdf5":
    os.remove(file)
print(f"/content/drive/MyDrive/Colab Notebooks/МИИиНС/Generator/weights/weights--{best}.hdf5")

/content/drive/MyDrive/Colab Notebooks/МИИиНС/Generator/weights/weights--1.6719.hdf5


In [None]:
filename = f"/content/drive/MyDrive/Colab Notebooks/МИИиНС/Generator/weights/weights--{best}.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [None]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [None]:
import sys
# pick a random seed
#start = numpy.random.randint(0, len(dataX)-1)
#pattern = dataX[start]

pattern = [1]*100
string = list(input("Input: ").lower()[0:100])
for char in range(len(string)):
  pattern[-len(string)+char] = (char_to_int[string[char]])
print(pattern)
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(200):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print("\nDone.")

Input: У меня был одногруппник рэпер — Славик.
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 81, 1, 74, 67, 75, 93, 1, 63, 89, 73, 1, 76, 66, 75, 76, 65, 78, 81, 77, 77, 75, 70, 72, 1, 78, 91, 77, 67, 78, 1, 97, 1, 79, 73, 62, 64, 70, 72, 14]
Seed:
"                                                              у меня был одногруппник рэпер — славик. "


— ну это было в том числе и тебе не понимаете, что ты просто сейчас не просто сейчас не просто сейчас не просто самое классных меняши.

— а так в какой-то меня он выложил самое классно.

— ну это бы
Done.
