In [65]:
# %%
import os, random
from pathlib import Path
from collections import Counter
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

# reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)


# %%
# USER: change this to your dataset folder (same as your PyTorch DATA_DIR)
DATA_DIR = Path("/Users/vaibav/Downloads/Material_Dataset/images/images")

if not DATA_DIR.exists():
    raise FileNotFoundError(f"{DATA_DIR} does not exist — update the path.")

IMG_SIZE = 224
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE

# Build list of filepaths + labels (sorted classes to match ImageFolder behavior)
class_names = sorted([p.name for p in DATA_DIR.iterdir() if p.is_dir()])
print("Classes found:", class_names)
num_classes = len(class_names)


Classes found: ['aerosol_cans', 'aluminum_food_cans', 'aluminum_soda_cans', 'cardboard_boxes', 'cardboard_packaging', 'clothing', 'coffee_grounds', 'disposable_plastic_cutlery', 'eggshells', 'food_waste', 'glass_beverage_bottles', 'glass_cosmetic_containers', 'glass_food_jars', 'magazines', 'newspaper', 'office_paper', 'paper_cups', 'plastic_cup_lids', 'plastic_detergent_bottles', 'plastic_food_containers', 'plastic_shopping_bags', 'plastic_soda_bottles', 'plastic_straws', 'plastic_trash_bags', 'plastic_water_bottles', 'shoes', 'steel_food_cans', 'styrofoam_cups', 'styrofoam_food_containers', 'tea_bags']


In [66]:
print(num_classes, "classes found.")

30 classes found.


In [67]:
image_paths = []
labels = []
valid_suffixes = {".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tif", ".tiff", ".png"}

for idx, cls in enumerate(class_names):
    for sub in ["default", "real_world"]:
        cls_dir = DATA_DIR / cls / sub
        if not cls_dir.exists():
            continue  # skip if missing
        print("Looking into:", cls_dir)
        for p in cls_dir.glob("*"):
            if p.suffix.lower() in valid_suffixes:
                image_paths.append(str(p))
                labels.append(idx)

print("Total images:", len(image_paths))
if len(image_paths) == 0:
    raise RuntimeError("No images found (check file suffixes and DATA_DIR).")



Looking into: /Users/vaibav/Downloads/Material_Dataset/images/images/aerosol_cans/default
Looking into: /Users/vaibav/Downloads/Material_Dataset/images/images/aerosol_cans/real_world
Looking into: /Users/vaibav/Downloads/Material_Dataset/images/images/aluminum_food_cans/default
Looking into: /Users/vaibav/Downloads/Material_Dataset/images/images/aluminum_food_cans/real_world
Looking into: /Users/vaibav/Downloads/Material_Dataset/images/images/aluminum_soda_cans/default
Looking into: /Users/vaibav/Downloads/Material_Dataset/images/images/aluminum_soda_cans/real_world
Looking into: /Users/vaibav/Downloads/Material_Dataset/images/images/cardboard_boxes/default
Looking into: /Users/vaibav/Downloads/Material_Dataset/images/images/cardboard_boxes/real_world
Looking into: /Users/vaibav/Downloads/Material_Dataset/images/images/cardboard_packaging/default
Looking into: /Users/vaibav/Downloads/Material_Dataset/images/images/cardboard_packaging/real_world
Looking into: /Users/vaibav/Downloads/Mat

In [68]:
print(image_paths[:5], labels[:5])

['/Users/vaibav/Downloads/Material_Dataset/images/images/aerosol_cans/default/Image_15.png', '/Users/vaibav/Downloads/Material_Dataset/images/images/aerosol_cans/default/Image_29.png', '/Users/vaibav/Downloads/Material_Dataset/images/images/aerosol_cans/default/Image_178.png', '/Users/vaibav/Downloads/Material_Dataset/images/images/aerosol_cans/default/Image_144.png', '/Users/vaibav/Downloads/Material_Dataset/images/images/aerosol_cans/default/Image_150.png'] [0, 0, 0, 0, 0]


In [69]:
train_paths, temp_paths, train_labels, temp_labels = train_test_split(
    image_paths, labels, test_size=0.3, stratify=labels, random_state=SEED
)
val_paths, test_paths, val_labels, test_labels = train_test_split(
    temp_paths, temp_labels, test_size=0.5, stratify=temp_labels, random_state=SEED
)

# Dataset builder
def process_path(path, label):
    img = tf.io.read_file(path)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)  # force 3 channels
    img = tf.image.resize(img, [IMG_SIZE, IMG_SIZE])
    img = tf.keras.applications.efficientnet.preprocess_input(img)
    return img, label


import tensorflow as tf

IMG_SIZE = 224
BATCH_SIZE = 32

def make_ds(paths, labels, shuffle=False):
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    
    def load_and_preprocess(path, label):
        img = tf.io.read_file(path)
        img = tf.image.decode_png(img, channels=3)
        img = tf.image.convert_image_dtype(img, tf.float32)
        img = tf.image.resize(img, [IMG_SIZE, IMG_SIZE])
        
        if shuffle:
            img = tf.image.random_flip_left_right(img)
            img = tf.image.random_brightness(img, 0.2)
            img = tf.image.random_contrast(img, 0.8, 1.2)
        
        return img, label
    
    ds = ds.map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    
    if shuffle:
        ds = ds.shuffle(buffer_size=1024)
    
    ds = ds.batch(BATCH_SIZE)
    ds = ds.prefetch(tf.data.AUTOTUNE)
    
    return ds

# Example usage
train_ds = make_ds(train_paths, train_labels, shuffle=True)
val_ds   = make_ds(val_paths, val_labels)
test_ds  = make_ds(test_paths, test_labels)

In [70]:
for imgs, labs in train_ds.take(1):
    print("Batch image shape:", imgs.shape)   
    print("Batch dtype:", imgs.dtype)


Batch image shape: (32, 224, 224, 3)
Batch dtype: <dtype: 'float32'>


In [71]:
base = tf.keras.applications.MobileNetV2(
    include_top=False,
    weights="imagenet",
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    pooling="avg"
)

base.trainable = True

# Optionally, freeze first few layers and train only the later ones
for layer in base.layers[:100]:
    layer.trainable = False


x = tf.keras.layers.Dropout(0.3)(base.output)
out = tf.keras.layers.Dense(num_classes, activation="softmax")(x)
model = tf.keras.Model(base.input, out)

model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

# Train
model.fit(train_ds, validation_data=val_ds, epochs=100)

# Evaluate
loss, acc = model.evaluate(test_ds)
print(f"🧪 Test Accuracy: {acc:.4f}")

Epoch 1/100
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 231ms/step - accuracy: 0.6494 - loss: 1.1882 - val_accuracy: 0.0484 - val_loss: 16.6648
Epoch 2/100
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 222ms/step - accuracy: 0.7740 - loss: 0.7003 - val_accuracy: 0.2716 - val_loss: 8.1912
Epoch 3/100
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 218ms/step - accuracy: 0.8266 - loss: 0.5268 - val_accuracy: 0.1369 - val_loss: 12.8110
Epoch 4/100
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 228ms/step - accuracy: 0.8501 - loss: 0.4430 - val_accuracy: 0.1400 - val_loss: 11.5455
Epoch 5/100
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 217ms/step - accuracy: 0.8691 - loss: 0.3790 - val_accuracy: 0.2409 - val_loss: 9.0565
Epoch 6/100
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 224ms/step - accuracy: 0.8853 - loss: 0.3208 - val_accuracy: 0.1947 - val_loss: 8.5210
E

In [72]:
model.save("material_detection.h5")  # saves as HDF5 file


