In [20]:

import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
import keras.utils

# Dosyadan metni okuma
with open("roman2.txt", "r", encoding="utf-8") as file:
    text = file.read()

# Metni kelimelere ayırma
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1

# Metni sayılara çevirme
sequences = tokenizer.texts_to_sequences([text])[0]
print(sequences)

# Giriş ve çıkış oluşturma
input_sequences = []
for i in range(1, len(sequences)):
    n_gram_sequence = sequences[:i+1]
    input_sequences.append(n_gram_sequence)

maxlen = max([len(seq) for seq in input_sequences])

input_sequences = pad_sequences(input_sequences, maxlen=maxlen, padding='pre')  
x, y = input_sequences[:, :-1], input_sequences[:, -1]
y = keras.utils.to_categorical(y, num_classes=total_words)

# LSTM modeli oluşturma
model = Sequential()
model.add(Embedding(total_words, 50, input_length=maxlen-1))
model.add(LSTM(100))
model.add(Dense(total_words, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy')
model.fit(x, y, epochs=50, verbose=1)



[4, 7, 30, 31, 4, 7, 32, 33, 34, 35, 36, 12, 37, 38, 39, 12, 40, 41, 42, 43, 44, 45, 46, 47, 48, 2, 49, 5, 50, 51, 8, 52, 53, 54, 55, 56, 57, 6, 58, 59, 60, 61, 62, 63, 64, 65, 66, 13, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 14, 3, 77, 14, 78, 79, 80, 81, 3, 82, 83, 84, 85, 86, 9, 87, 88, 1, 89, 90, 91, 92, 2, 93, 94, 95, 15, 1, 16, 96, 17, 97, 98, 6, 99, 5, 100, 101, 5, 102, 103, 104, 105, 106, 107, 108, 109, 18, 8, 110, 111, 112, 113, 2, 114, 115, 13, 116, 117, 118, 6, 119, 120, 121, 122, 123, 10, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 6, 135, 136, 137, 138, 11, 139, 140, 141, 3, 142, 143, 18, 8, 19, 144, 145, 10, 1, 146, 16, 147, 148, 149, 150, 151, 2, 152, 153, 1, 154, 155, 156, 17, 20, 157, 158, 21, 159, 160, 161, 162, 163, 22, 23, 164, 165, 22, 23, 166, 167, 168, 3, 9, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 9, 179, 180, 181, 11, 182, 183, 184, 21, 185, 186, 187, 188, 189, 3, 190, 191, 11, 1, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 1, 202, 203,

<keras.src.callbacks.History at 0x1c653819c90>

In [23]:
# Örnek metin üretimi
def generate_text(seed_text, next_words, model, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        # token_list'e bir kelime daha ekledik
        token_list = pad_sequences([token_list], maxlen=max_sequence_len, padding='pre')
        predicted_probs = model.predict(token_list, verbose=0)[0]
        predicted = np.argmax(predicted_probs)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

# Örnek metin üretimi
generated_text = generate_text("Üstündeki ", 15, model, max_sequence_len=maxlen-1)
print(generated_text)


Üstündeki  kemal kemal kemal kemal kemal kemal kemal kemal kemal kemal kemal kemal kemal köyünde köyünde


[[  0   0   0 ...   0   4   7]
 [  0   0   0 ...   4   7  30]
 [  0   0   0 ...   7  30  31]
 ...
 [  0   0   4 ... 304 305 306]
 [  0   4   7 ... 305 306 307]
 [  4   7  30 ... 306 307 308]]
