# üëó DemandAI - Enterprise Multimodal Training Notebook (Production V3.1)
**Version 3.1 (Senior Systems Engineer Edition)**

This notebook trains the **EXACT** architecture used in production.
It uses:
- **DistilBERT** for Text (Real Pretrained Weights, loaded efficiently)
- **MobileNetV2** for Images (Real ImageNet Weights, correct preprocessing)
- **TFT-Style Transformer** for Time Series

### **Instructions**
1.  **Select GPU Runtime**: Runtime > Change runtime type > T4 GPU (Required for BERT).
2.  **Run All Cells**: This will produce `model_weights.h5` and `scaler.pkl`.
3.  **Deploy**: Upload these files to your `ml_service/` folder.

In [None]:
# [STEP 1] Install Dependencies
!pip install tensorflow pandas scikit-learn numpy matplotlib faker joblib transformers
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models, Model, Input
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from transformers import TFDistilBertModel, DistilBertTokenizer
from sklearn.preprocessing import MinMaxScaler
import joblib
from faker import Faker
import random
import os

print(f"‚úÖ TensorFlow Version: {tf.__version__}")

In [None]:
# [STEP 2] Generate Multimodal Dataset
BATCH_SIZE = 2000 # Reduced for Demo Speed (Increase to 20,000 for full training)
print(f"üßµ Generating {BATCH_SIZE} Multimodal Samples...")

fake = Faker()
data = []
categories = ['Ladieswear', 'Divided', 'Menswear', 'Baby/Children', 'Sport']
product_types = ['Trousers', 'Dress', 'Sweater', 'T-shirt', 'Jacket']

for _ in range(BATCH_SIZE):
    # 1. Text Data
    cat = random.choice(categories)
    ptype = random.choice(product_types)
    color = fake.color_name()
    desc = f"{color} {ptype} in {cat} collection. Modern style."
    
    # 2. Sales Data
    base_vol = random.randint(10, 50)
    # Generate 30 days of history
    history = [max(0, int(base_vol * random.uniform(0.8, 1.2) + i*0.1)) for i in range(30)]
    
    # 3. Target
    target = int(history[-1] * random.uniform(0.9, 1.1))
    data.append([desc, history, target])

df = pd.DataFrame(data, columns=['description', 'sales_history', 'demand_target'])
print("‚úÖ Dataset Generated.")

In [None]:
# [STEP 3] Preprocessing & Scaling

# A. Scale Sales Data (Production Correct)
print("‚öñÔ∏è Fitting Scaler...")
scaler = MinMaxScaler(feature_range=(0, 1))

all_values = []
for h in df['sales_history']: all_values.extend(h)
all_values.extend(df['demand_target'].values)
scaler.fit(np.array(all_values).reshape(-1, 1))

joblib.dump(scaler, "scaler.pkl")
print("‚úÖ Scaler saved to 'scaler.pkl'")

# B. Prepare Inputs
print("üîÑ Preparing Tensors...")

# 1. Text Inputs (Real Tokenization)
try:
    tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
    tokenized = tokenizer(
        df['description'].tolist(),
        padding='max_length',
        truncation=True,
        max_length=128,
        return_tensors='tf'
    )
    input_ids = tokenized['input_ids']
    attention_mask = tokenized['attention_mask']
    print("‚úÖ BERT Tokenization Complete")
except Exception as e:
    print(f"‚ö†Ô∏è Tokenizer Error: {e}")
    # Fallback for offline demo
    input_ids = np.random.randint(0, 30522, (BATCH_SIZE, 128))
    attention_mask = np.ones((BATCH_SIZE, 128))

# 2. Image Inputs (Correct MobileNetV2 Preprocessing)
# Generate raw pixel values [0, 255]
raw_images = np.random.randint(0, 255, (BATCH_SIZE, 224, 224, 3)).astype(np.float32)
# Apply standard MobileNet preprocessing (scales to [-1, 1])
image_input = preprocess_input(raw_images)
print("‚úÖ Image Preprocessing Complete ([-1, 1] range)")

# 3. Time Series Inputs (Scaled)
ts_input = np.zeros((BATCH_SIZE, 30, 5), dtype=np.float32)
for i, hist in enumerate(df['sales_history']):
    scaled_hist = scaler.transform(np.array(hist).reshape(-1, 1)).flatten()
    ts_input[i, :, 0] = scaled_hist

# 4. Targets
targets = scaler.transform(df['demand_target'].values.reshape(-1, 1))

In [None]:
# [STEP 4] Build Real Production Architecture

def build_model():
    # --- 1. Text Input (DistilBERT) ---
    input_ids = layers.Input(shape=(128,), dtype=tf.int32, name='input_ids')
    attention_mask = layers.Input(shape=(128,), dtype=tf.int32, name='attention_mask')
    
    # LOAD REAL BERT
    distilbert = TFDistilBertModel.from_pretrained('distilbert-base-uncased')
    distilbert.trainable = False # Freeze for speed/stability
    
    bert_out = distilbert(input_ids=input_ids, attention_mask=attention_mask)[0]
    text_features = layers.GlobalAveragePooling1D()(bert_out)
    text_features = layers.Dense(64, activation='relu')(text_features)

    # --- 2. Image Input (MobileNetV2) ---
    image_input = layers.Input(shape=(224, 224, 3), name='image_input')
    
    # LOAD REAL MOBILENET
    mobilenet = MobileNetV2(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
    mobilenet.trainable = False # Freeze
    
    image_features = mobilenet(image_input)
    image_features = layers.GlobalAveragePooling2D()(image_features)
    image_features = layers.Dense(64, activation='relu')(image_features)

    # --- 3. Time Series (TFT Style) ---
    ts_input = layers.Input(shape=(30, 5), name='ts_input') 
    x_ts = layers.Dense(64)(ts_input)
    
    positions = tf.range(start=0, limit=30, delta=1)
    pos_embedding = layers.Embedding(input_dim=30, output_dim=64)(positions)
    x_ts = x_ts + pos_embedding

    attention_output = layers.MultiHeadAttention(num_heads=4, key_dim=64)(x_ts, x_ts)
    x_ts = layers.Add()([x_ts, attention_output])
    x_ts = layers.LayerNormalization(epsilon=1e-6)(x_ts)

    ffn = layers.Dense(64, activation="relu")(x_ts)
    x_ts = layers.Add()([x_ts, ffn])
    x_ts = layers.LayerNormalization(epsilon=1e-6)(x_ts)

    ts_features = layers.GlobalAveragePooling1D()(x_ts)

    # --- Fusion ---
    concat = layers.Concatenate()([text_features, image_features, ts_features])
    x = layers.Dense(128, activation='relu')(concat)
    x = layers.Dropout(0.3)(x)
    
    gated_x = layers.Dense(64, activation='elu')(x)
    linear_x = layers.Dense(64)(x)
    x = layers.Multiply()([gated_x, linear_x])
    
    output = layers.Dense(1, activation='linear', name='demand_output')(x)

    model = Model(inputs=[input_ids, attention_mask, image_input, ts_input], outputs=output)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

model = build_model()
print("‚úÖ Production Architecture Built.")

In [None]:
# [STEP 5] Train & Save Weights Only
print("üöÄ Starting Training...")
history = model.fit(
    {
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'image_input': image_input,
        'ts_input': ts_input
    },
    targets,
    batch_size=16,
    epochs=3,
    validation_split=0.2
)

print("‚úÖ Training Complete.")
# IMPORTANT: Saving WEIGHTS ONLY to be safe across environments
model.save_weights('model_weights.h5')
print("üíæ Saved: model_weights.h5 (Upload this!)")
print("üíæ Saved: scaler.pkl (Upload this!)")