In [3]:
pip install torch torchvision flask flask-cors

^C
Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import torch
import numpy as np
import pandas as pd
import tensorflow as tf
from flask import Flask, request, jsonify
from datasets import Dataset
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Attention
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments, DataCollatorForLanguageModeling

ModuleNotFoundError: No module named 'torch'

In [None]:

# Define paths
DATASET_PATH = "C:\\Users\\91807\\OneDrive\\Desktop\\TOC\\writingPrompts"
MODEL_PATH = "C:\\Users\\91807\\OneDrive\\Desktop\\TOC\\gpt2-tokenizer"
OUTPUT_PATH = "C:\\Users\\91807\\OneDrive\\Desktop\\TOC\\output"

# Ensure output directory exists
os.makedirs(OUTPUT_PATH, exist_ok=True)

In [None]:
# Load dataset function
def load_dataset(source_file, target_file):
    source_path = os.path.join(DATASET_PATH, source_file)
    target_path = os.path.join(DATASET_PATH, target_file)
    with open(source_path, 'r', encoding='utf-8') as src, open(target_path, 'r', encoding='utf-8') as tgt:
        prompts = src.readlines()
        stories = tgt.readlines()
    return pd.DataFrame({"prompt": prompts, "story": stories})

# Load and preprocess datasets
train_df = load_dataset("train.wp_source", "train.wp_target")
valid_df = load_dataset("valid.wp_source", "valid.wp_target")
test_df = load_dataset("test.wp_source", "test.wp_target")
train_df.to_csv(os.path.join(OUTPUT_PATH, "train.csv"), index=False)
valid_df.to_csv(os.path.join(OUTPUT_PATH, "valid.csv"), index=False)
test_df.to_csv(os.path.join(OUTPUT_PATH, "test.csv"), index=False)

# Load tokenizer
tokenizer = GPT2Tokenizer.from_pretrained(MODEL_PATH)
tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    return tokenizer(examples["prompt"], padding="max_length", truncation=True, max_length=512)

In [None]:
# Convert to HuggingFace dataset
dataset = Dataset.from_pandas(train_df)
tokenized_datasets = dataset.map(tokenize_function, batched=True).train_test_split(test_size=0.1)

# Set training arguments
training_args = TrainingArguments(
    output_dir=os.path.join(OUTPUT_PATH, "gpt2_finetuned"),
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

# Load GPT-2 model
model = GPT2LMHeadModel.from_pretrained(MODEL_PATH)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
)

In [None]:
# Train model
trainer.train()
model.save_pretrained(os.path.join(OUTPUT_PATH, "gpt2_finetuned"))
tokenizer.save_pretrained(os.path.join(OUTPUT_PATH, "gpt2_finetuned"))

# Save model
model.save(os.path.join(OUTPUT_PATH, "story_generation_model.h5"))

# Load and generate story
def generate_story(prompt, model, tokenizer, max_seq_length=100):
    prompt_seq = tokenizer.texts_to_sequences([prompt])
    prompt_padded = pad_sequences(prompt_seq, maxlen=max_seq_length, padding='post')
    decoder_input = np.zeros((1, max_seq_length))
    decoder_input[0, 0] = tokenizer.word_index['<start>']
    
    for i in range(1, max_seq_length):
        predictions = model.predict([prompt_padded, decoder_input])
        predicted_id = np.argmax(predictions[0, i-1, :])
        decoder_input[0, i] = predicted_id
        if predicted_id == tokenizer.word_index['<end>']:
            break
    
    return tokenizer.sequences_to_texts(decoder_input)[0]


In [None]:
# Flask API
app = Flask(__name__)

@app.route('/generate', methods=['POST'])
def generate():
    data = request.json
    prompt = data.get('prompt')
    story = generate_story(prompt, model, tokenizer)
    return jsonify({"story": story})

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)