# 🚀 Remote Training with Colab

This notebook is automatically launched from a Streamlit UI with URL parameters:
- `model`: model name (e.g., distilbert-base-uncased)
- `task`: NLP or Image Classification
- `lr`, `batch_size`, `epochs`, `gpu`
- `dataset_url`: URL to CSV or image zip

In [None]:
# Install dependencies
!pip install transformers datasets scikit-learn tensorflow

In [None]:
# 🔧 Load parameters passed via URL
params = {
    'model': 'distilbert-base-uncased',
    'task': 'NLP',
    'lr': 2e-5,
    'batch_size': 8,
    'epochs': 3,
    'gpu': 'T4',
    'dataset_url': 'https://drive.google.com/your-uploaded-dataset.csv'
}

from IPython.display import display, HTML
display(HTML(f"<b>Training Model:</b> {params['model']}<br><b>Task:</b> {params['task']}"))

In [None]:
# 🗂️ Download dataset
import os
dataset_url = params['dataset_url']

if dataset_url.endswith('.csv'):
    !wget "{dataset_url}" -O dataset.csv
    import pandas as pd
    df = pd.read_csv("dataset.csv")
    df.head()

elif dataset_url.endswith('.zip'):
    !wget "{dataset_url}" -O images.zip
    !unzip -q images.zip -d ./images
    !ls ./images

In [None]:
# ✅ Train model (NLP or Image)
if params['task'] == 'NLP':
    from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
    from datasets import Dataset
    
    tokenizer = AutoTokenizer.from_pretrained(params['model'])
    model = AutoModelForSequenceClassification.from_pretrained(params['model'], num_labels=2)
    dataset = Dataset.from_pandas(df[['text', 'label']])
    dataset = dataset.map(lambda e: tokenizer(e['text'], truncation=True, padding=True), batched=True)

    args = TrainingArguments(output_dir="./model",
        per_device_train_batch_size=params['batch_size'],
        learning_rate=params['lr'],
        num_train_epochs=params['epochs'],
        logging_dir='./logs')

    trainer = Trainer(model=model, args=args, train_dataset=dataset)
    trainer.train()
    model.save_pretrained("./model")
    tokenizer.save_pretrained("./model")

else:
    from tensorflow.keras.preprocessing.image import ImageDataGenerator
    from tensorflow.keras.applications import MobileNetV2
    from tensorflow.keras.models import Model
    from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
    import tensorflow as tf

    datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
    train_gen = datagen.flow_from_directory('./images', target_size=(224, 224), batch_size=params['batch_size'], subset='training')
    val_gen = datagen.flow_from_directory('./images', target_size=(224, 224), batch_size=params['batch_size'], subset='validation')

    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    output = Dense(train_gen.num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)

    for layer in base_model.layers:
        layer.trainable = False

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=params['lr']),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    model.fit(train_gen, validation_data=val_gen, epochs=params['epochs'])
    model.save("./vision_model")