In [1]:
pip install pandas numpy tensorflow scikit-learn flask



In [2]:
import pandas as pd
import numpy as np

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Load dataset
df = pd.read_csv("/content/lstm_translator_dataset_10000.csv")

# Merge language + text (helps model learn context)
df["input_text"] = df["source_text"] # Assuming 'source_text' is the source text
df["output_text"] = df["target_text"] # Assuming 'target_text' is the target text

X = df["input_text"]
y = df["output_text"]

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X.tolist() + y.tolist())

X_seq = tokenizer.texts_to_sequences(X)
y_seq = tokenizer.texts_to_sequences(y)

max_len = 10

X_pad = pad_sequences(X_seq, maxlen=max_len, padding='post')
y_pad = pad_sequences(y_seq, maxlen=max_len, padding='post')

vocab_size = len(tokenizer.word_index) + 1

# Simple LSTM model
model = Sequential()
model.add(Embedding(vocab_size, 64, input_length=max_len))
model.add(LSTM(64))
model.add(Dense(vocab_size, activation='softmax'))

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

# reshape output
y_pad = np.expand_dims(y_pad[:,0], axis=1)

model.fit(X_pad, y_pad, epochs=5, batch_size=64)

model.save("translator_lstm.h5")

print("Model Saved")



Epoch 1/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.0431 - loss: 3.8494
Epoch 2/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.0653 - loss: 3.3687
Epoch 3/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.0651 - loss: 3.3684
Epoch 4/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.0599 - loss: 3.3720
Epoch 5/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.0694 - loss: 3.3641




Model Saved


In [3]:
pip install gradio==3.50.2 tensorflow



In [4]:
import gradio as gr
import pandas as pd

# Load dataset
original_df = pd.read_csv("/content/lstm_translator_dataset_10000 (1).csv")

# Remove null values
original_df = original_df.dropna()

# Prepare data for Gradio function
# We will create entries for English to Hindi and English to Punjabi translations.
# The source text will always be from the 'source_text' column when 'source_language' is 'english'.

translation_records = []

for index, row in original_df.iterrows():
    # Ensure the source language is English
    if str(row['source_language']).strip().lower() == 'english':
        english_text = str(row['source_text']).strip().lower()

        target_lang_val = str(row['target_language']).strip().lower()
        target_text_val = str(row['target_text'])

        if target_lang_val == 'hindi':
            translation_records.append({
                'source_text': english_text,
                'target_language': 'hindi',
                'target_text': target_text_val
            })
        elif target_lang_val == 'punjabi':
            translation_records.append({
                'source_text': english_text,
                'target_language': 'punjabi',
                'target_text': target_text_val
            })

df = pd.DataFrame(translation_records)

def translate(text, target_lang):
    text = str(text).strip().lower()

    # Filter rows safely
    rows = df[df["source_text"] == text]

    if len(rows) == 0:
        return "Sentence not found in dataset"

    rows2 = rows[rows["target_language"] == target_lang]

    if len(rows2) == 0:
        return "Translation not available for selected language"

    return rows2.iloc[0]["target_text"]

app = gr.Interface(
    fn=translate,
    inputs=[
        gr.Textbox(label="Enter Text"),
        gr.Dropdown(["hindi","punjabi"], label="Target Language") # Updated dropdown options
    ],
    outputs="text",
    title="Language Translator (Demo Dataset)"
)

app.launch()


IMPORTANT: You are using gradio version 3.50.2, however version 4.44.1 is available, please upgrade.
--------
Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://0c0bf4375a82267a7a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


