In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
!pip install moviepy




In [None]:
import cv2
import os

# Path to your dataset in Google Drive
dataset_folder = "/content/drive/MyDrive/eth"

# Where to save extracted frames
output_folder = "/content/drive/MyDrive/eth_frames"
os.makedirs(output_folder, exist_ok=True)

# Number of frames per second to extract
fps_extract = 10  # you can adjust if needed

# Loop through each Amharic-labeled folder
for subfolder in os.listdir(dataset_folder):
    subfolder_path = os.path.join(dataset_folder, subfolder)
    if os.path.isdir(subfolder_path):
        frame_subfolder = os.path.join(output_folder, subfolder)
        os.makedirs(frame_subfolder, exist_ok=True)

        # Loop through each video in the folder
        for file in os.listdir(subfolder_path):
            if file.endswith(".mp4"):
                video_path = os.path.join(subfolder_path, file)
                cap = cv2.VideoCapture(video_path)
                video_fps = cap.get(cv2.CAP_PROP_FPS)
                frame_interval = max(int(video_fps / fps_extract), 1)  # skip frames

                count = 0
                saved_count = 0
                while True:
                    ret, frame = cap.read()
                    if not ret:
                        break
                    if count % frame_interval == 0:
                        frame_name = f"{file[:-4]}_{saved_count}.jpg"
                        cv2.imwrite(os.path.join(frame_subfolder, frame_name), frame)
                        saved_count += 1
                    count += 1

                cap.release()
        print(f"Frames extracted for folder: {subfolder}")

print("All frames extracted successfully!")


Frames extracted for folder: ውሃ
Frames extracted for folder: ድምፅ
Frames extracted for folder: እገዛ
Frames extracted for folder: ጥሩ
Frames extracted for folder: ግራ
Frames extracted for folder: እግር
Frames extracted for folder: ድንጋይ
Frames extracted for folder: እንደገና
Frames extracted for folder: ይቅርታ
Frames extracted for folder: ጨምር
Frames extracted for folder: ቀለም
Frames extracted for folder: ሂድ
Frames extracted for folder: እባክህ
Frames extracted for folder: አመሰግናለሁ
Frames extracted for folder: መንደር
Frames extracted for folder: አዎን
Frames extracted for folder: አቁም
Frames extracted for folder: ህመም


In [None]:
dataset_folder = "/content/drive/MyDrive/eth_frames"
print("Using dataset:", dataset_folder)


Using dataset: /content/drive/MyDrive/eth_frames


In [None]:
import os

empty_folders = []

for cls in os.listdir(dataset_folder):
    cls_path = os.path.join(dataset_folder, cls)

    if not os.path.isdir(cls_path):
        continue

    images = [img for img in os.listdir(cls_path)
              if img.lower().endswith(('jpg','jpeg','png'))]

    if len(images) == 0:
        empty_folders.append(cls)

print("❗ Empty folders:", empty_folders)


❗ Empty folders: ['ምግብ', 'split']


In [None]:
import cv2
import os

source_folder = "/content/drive/MyDrive/eth/ምግብ"     # videos
dest_folder   = "/content/drive/MyDrive/eth_frames/ምግብ"  # extracted frames

os.makedirs(dest_folder, exist_ok=True)

video_files = [f for f in os.listdir(source_folder) if f.lower().endswith(('.mp4', '.mov', '.avi'))]

print("Found videos:", video_files)

for video in video_files:
    video_path = os.path.join(source_folder, video)
    cap = cv2.VideoCapture(video_path)

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total_frames == 0:
        print("⚠ Skipping empty video:", video)
        continue

    frame_interval = max(total_frames // 30, 1)  # extract 30 frames
    frame_count = 0
    saved = 0

    while True:
        ret = cap.grab()
        if not ret:
            break

        if frame_count % frame_interval == 0:
            ret, frame = cap.retrieve()
            if ret:
                frame_name = f"{video}_frame_{saved}.jpg"
                cv2.imwrite(os.path.join(dest_folder, frame_name), frame)
                saved += 1

        frame_count += 1

    cap.release()
    print(f"Extracted {saved} frames from:", video)

print("✔ Done extracting frames for ምግብ class.")


Found videos: ['20251202_090533.mp4', '20251202_093040.mp4', '20251202_101500.mp4']
Extracted 31 frames from: 20251202_090533.mp4
Extracted 31 frames from: 20251202_093040.mp4
Extracted 31 frames from: 20251202_101500.mp4
✔ Done extracting frames for ምግብ class.


In [None]:
!pip install tensorflow
!pip install keras
!pip install opencv-python




In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

dataset_path = "/content/drive/MyDrive/eth_frames"

# Data augmentation and normalization
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # 80% train, 20% validation
)

# Training data
train_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(224,224),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

# Validation data
val_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(224,224),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)


Found 16725 images belonging to 21 classes.
Found 4169 images belonging to 21 classes.


In [None]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout

# Load pre-trained MobileNetV2 without top layer
base_model = MobileNetV2(input_shape=(224,224,3), include_top=False, weights='imagenet')
base_model.trainable = False  # freeze base layers

# Add custom layers
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(20, activation='softmax')  # 20 classes for 20 Amharic words
])

# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
print(train_generator.class_indices)
print(len(train_generator.class_indices))


{'split': 0, 'ሂድ': 1, 'ህመም': 2, 'መንደር': 3, 'ምግብ': 4, 'ሰላም': 5, 'ቀለም': 6, 'አመሰግናለሁ': 7, 'አቁም': 8, 'አዎን': 9, 'እባክህ': 10, 'እንደገና': 11, 'እገዛ': 12, 'እግር': 13, 'ውሃ': 14, 'ይቅርታ': 15, 'ድምፅ': 16, 'ድንጋይ': 17, 'ግራ': 18, 'ጥሩ': 19, 'ጨምር': 20}
21


In [None]:
import shutil
import os

train_split = "/content/drive/MyDrive/eth_frames/train/split"
val_split = "/content/drive/MyDrive/eth_frames/val/split"

if os.path.exists(train_split):
    shutil.rmtree(train_split)
    print("Removed train/split folder")

if os.path.exists(val_split):
    shutil.rmtree(val_split)
    print("Removed val/split folder")


In [None]:
dataset_root = "/content/drive/MyDrive/eth_frames"

train_dir = dataset_root + "/train"
val_dir = dataset_root + "/val"


In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

dataset_folder = "/content/drive/MyDrive/eth_frames"
train_dir = os.path.join(dataset_folder, "train")
val_dir = os.path.join(dataset_folder, "val")

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

classes = [c for c in os.listdir(dataset_folder) if os.path.isdir(os.path.join(dataset_folder, c))]

for cls in classes:
    cls_path = os.path.join(dataset_folder, cls)

    if cls in ["train", "val"]:
        continue

    images = [img for img in os.listdir(cls_path) if img.lower().endswith(('.jpg','png','jpeg'))]

    if len(images) < 2:
        print(f"Skipping class (not enough images): {cls}")
        continue

    train_imgs, val_imgs = train_test_split(images, test_size=0.2, random_state=42)

    os.makedirs(os.path.join(train_dir, cls), exist_ok=True)
    os.makedirs(os.path.join(val_dir, cls), exist_ok=True)

    # Move and rename to avoid duplicates
    for idx, img in enumerate(train_imgs):
        src = os.path.join(cls_path, img)
        dst = os.path.join(train_dir, cls, f"train_{idx}_{img}")
        shutil.move(src, dst)

    for idx, img in enumerate(val_imgs):
        src = os.path.join(cls_path, img)
        dst = os.path.join(val_dir, cls, f"val_{idx}_{img}")
        shutil.move(src, dst)

print("✔ Train/Validation split created (moved & renamed)!")


✔ Train/Validation split created (moved & renamed)!


In [None]:
train_generator = train_datagen.flow_from_directory(train_dir, target_size=image_size, batch_size=batch_size, class_mode='categorical')
val_generator = val_datagen.flow_from_directory(val_dir, target_size=image_size, batch_size=batch_size, class_mode='categorical')


Found 16706 images belonging to 20 classes.
Found 4188 images belonging to 20 classes.


In [None]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout

num_classes = train_generator.num_classes

base_model = MobileNetV2(input_shape=(224,224,3), include_top=False, weights='imagenet')
base_model.trainable = False

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=15  # you can increase later if needed
)


Epoch 1/15
[1m523/523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4491s[0m 8s/step - accuracy: 0.4319 - loss: 1.9379 - val_accuracy: 0.8968 - val_loss: 0.4639
Epoch 2/15
[1m523/523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1588s[0m 3s/step - accuracy: 0.8331 - loss: 0.5781 - val_accuracy: 0.9028 - val_loss: 0.3307
Epoch 3/15
[1m523/523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1580s[0m 3s/step - accuracy: 0.8873 - loss: 0.3757 - val_accuracy: 0.9234 - val_loss: 0.2525
Epoch 4/15
[1m523/523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1587s[0m 3s/step - accuracy: 0.9151 - loss: 0.2855 - val_accuracy: 0.8799 - val_loss: 0.3877
Epoch 5/15
[1m523/523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1591s[0m 3s/step - accuracy: 0.9243 - loss: 0.2455 - val_accuracy: 0.9491 - val_loss: 0.1485
Epoch 6/15
[1m523/523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1643s[0m 3s/step - accuracy: 0.9346 - loss: 0.2127 - val_accuracy: 0.9358 - val_loss: 0.1955
Epoch 7/15
[1m5

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
dataset_root = "/content/drive/MyDrive/eth_frames"
train_dir = dataset_root + "/train"
val_dir = dataset_root + "/val"


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

image_size = (224, 224)
batch_size = 32

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical'
)


Found 16706 images belonging to 20 classes.
Found 4188 images belonging to 20 classes.


In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model

base = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224,224,3))
base.trainable = False   # freeze base layer

x = GlobalAveragePooling2D()(base.output)
x = Dropout(0.4)(x)  # slightly stronger dropout
output = Dense(train_generator.num_classes, activation='softmax')(x)

model = Model(inputs=base.input, outputs=output)

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout

num_classes = train_generator.num_classes

base_model = MobileNetV2(input_shape=(224,224,3), include_top=False, weights='imagenet')
base_model.trainable = False

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [1]:
print(train_generator.class_indices)


NameError: name 'train_generator' is not defined

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint_path = "/content/drive/MyDrive/eth_model_mobilenet_best.keras"

checkpoint = ModelCheckpoint(
    filepath=checkpoint_path,
    monitor='val_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)


In [None]:
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=15,
    callbacks=[checkpoint]   # automatic save
)


  self._warn_if_super_not_called()


Epoch 1/15
[1m523/523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10s/step - accuracy: 0.4549 - loss: 1.8612
Epoch 1: val_accuracy improved from -inf to 0.86915, saving model to /content/drive/MyDrive/eth_model_mobilenet_best.keras
[1m523/523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6328s[0m 12s/step - accuracy: 0.4552 - loss: 1.8600 - val_accuracy: 0.8691 - val_loss: 0.5195
Epoch 2/15
[1m523/523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.8454 - loss: 0.5336
Epoch 2: val_accuracy improved from 0.86915 to 0.88252, saving model to /content/drive/MyDrive/eth_model_mobilenet_best.keras
[1m523/523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1713s[0m 3s/step - accuracy: 0.8455 - loss: 0.5335 - val_accuracy: 0.8825 - val_loss: 0.3750
Epoch 3/15
[1m523/523[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.8923 - loss: 0.3487
Epoch 3: val_accuracy improved from 0.88252 to 0.92144, saving model to /content/drive