In [8]:
import os
import pickle
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Set Base Directory
BASE_DIR = "/content/drive/My Drive/MINI_PROJECT"
DATASET_DIR = os.path.join(BASE_DIR, "data")  # FIXED PATH

# Collect all image file paths and labels
image_paths = []
labels = []
for root, _, files in os.walk(DATASET_DIR):
    for file in files:
        if file.endswith(('.jpg', '.jpeg', '.png')):
            image_paths.append(os.path.join(root, file))
            labels.append(os.path.basename(root))  # Folder name as label

# Convert to DataFrame
data_df = pd.DataFrame({"filename": image_paths, "class": labels})

# Split Data
train_df, temp_df = train_test_split(data_df, test_size=0.2, stratify=data_df["class"], random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df["class"], random_state=42)

# Extract class labels
class_labels = sorted(data_df["class"].unique())

# Save Processed Data
processed_data = {
    "class_labels": class_labels,
    "train_files": train_df["filename"].tolist(),
    "val_files": val_df["filename"].tolist(),
    "test_files": test_df["filename"].tolist()
}

with open(os.path.join(BASE_DIR, "processed_data.pkl"), "wb") as f:
    pickle.dump(processed_data, f)

print("✅ Data preprocessing complete. 'processed_data.pkl' saved successfully!")


✅ Data preprocessing complete. 'processed_data.pkl' saved successfully!
