In [2]:
import pandas as pd
import numpy as np

In [None]:
import os
from pathlib import Path
import pandas as pd
from typing import Dict
 
ALPHABETS = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")

def count_images_per_split(dataset_dir: str) -> pd.DataFrame:
    """
    Count images per ASL letter in train and test splits.

    Args:
        dataset_dir: Root dataset directory containing 'train' and 'test' subfolders.

    Returns:
        DataFrame with columns: ['Letter', 'Train', 'Test', 'Total']
    """
    dataset_path = Path(dataset_dir)
    train_path = dataset_path / 'train'
    test_path = dataset_path / 'test'


    def count_in_split(split_path: Path) -> Dict[str, int]:
        counts: Dict[str, int] = {letter: 0 for letter in ALPHABETS}
        if not split_path.exists():
            return counts
        for letter in ALPHABETS:
            letter_dir = split_path / letter
            if letter_dir.exists() and letter_dir.is_dir():
                num_files = sum(
                    1 for p in letter_dir.iterdir()
                    if p.is_file() and p.suffix.lower() in {'.jpg', '.jpeg', '.png'}
                )
                counts[letter] = num_files
        return counts

    train_counts = count_in_split(train_path)
    test_counts = count_in_split(test_path)

    rows = []
    for letter in ALPHABETS:
        tr = train_counts.get(letter, 0)
        te = test_counts.get(letter, 0)
        rows.append({
            'Letter': letter,
            'Train': tr,
            'Test': te,
            'Total': tr + te,
        })

    df = pd.DataFrame(rows)
    return df



In [None]:

counts_df = count_images_per_split('ASL_Alphabet_Dataset')
print(counts_df)


   Letter  Train  Test  Total
0       A   8458     0   8458
1       B   8309     0   8309
2       C   8146     0   8146
3       D   7629     0   7629
4       E   7744     0   7744
5       F   8031     0   8031
6       G   7844     0   7844
7       H   7906     0   7906
8       I   7953     0   7953
9       J   7503     0   7503
10      K   7876     0   7876
11      L   7939     0   7939
12      M   7900     0   7900
13      N   7932     0   7932
14      O   8140     0   8140
15      P   7601     0   7601
16      Q   7954     0   7954
17      R   8021     0   8021
18      S   8109     0   8109
19      T   8054     0   8054
20      U   8023     0   8023
21      V   7597     0   7597
22      W   7787     0   7787
23      X   8093     0   8093
24      Y   8178     0   8178
25      Z   7410     0   7410


In [None]:
# create a dataframe with the following columns
# base64 encoded image, label
# we are picking 30 images from each classes with results in 30*26=780 images
# we are using the openai api to encode the image to base64


In [45]:
import base64
from pathlib import Path
import random
import pandas as pd
from typing import List

DATASET_DIR = Path('./ASL_Alphabet_Dataset')  # change if needed
LETTERS: List[str] = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
IMAGES_PER_CLASS = 30


def _gather_letter_images(root: Path, letter: str, images_per_class: int = IMAGES_PER_CLASS) -> List[Path]:
    """Return list of image Paths for a given letter from common locations."""
    candidates: List[Path] = []

    # train/test structure
    split_dir = root / 'train' / letter
    if split_dir.exists():
        candidates.extend(
            [p for p in split_dir.iterdir()
                if p.is_file() and p.suffix.lower() in {'.jpg', '.jpeg', '.png'}]
        )
    else:
        print("No directory found",split_dir)
    sample = random.sample(candidates, images_per_class)
    return sample


def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        base64_image = base64.b64encode(image_file.read()).decode('utf-8')
        return base64_image

def build_base64_dataset(dataset_dir: Path = DATASET_DIR,
                         images_per_class: int = IMAGES_PER_CLASS) -> pd.DataFrame:
    """
    Build a DataFrame with columns [image_base64, label], sampling up to
    images_per_class images per ASL letter.
    """
    rows = []
    for letter in LETTERS:
        images = _gather_letter_images(dataset_dir, letter)
        print(f'Found {len(images)} images for letter {letter}')
        if not images:
            print(f'Warning: No images found for letter {letter}')
            continue

        for file_path in images:
            try:
                img_b64 = encode_image(file_path)
                rows.append({'image_base64': img_b64, 'label': letter})
            except Exception as e:
                print(f'Failed to encode {file_path}: {e}')

    df = pd.DataFrame(rows, columns=['image_base64', 'label'])
    print(f'Built dataset with {len(df)} rows (target ~ {images_per_class * len(LETTERS)}).')
    return df




In [None]:
# # Test the fixed function
# test_result = _gather_letter_images(Path('ASL_Alphabet_Dataset'), 'A', 5)
# print(test_result)
# for file_path in test_result:
#     try:
#         img_b64 = encode_image(file_path)
#         print(img_b64)
#     except Exception as e:
#         print(f'Failed to encode {file_path}: {e}')

[PosixPath('ASL_Alphabet_Dataset/train/A/A2484.jpg'), PosixPath('ASL_Alphabet_Dataset/train/A/A (3643).jpg'), PosixPath('ASL_Alphabet_Dataset/train/A/a_6_rotate_1.jpeg'), PosixPath('ASL_Alphabet_Dataset/train/A/A (3028).jpg'), PosixPath('ASL_Alphabet_Dataset/train/A/A1677.jpg')]
/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQECAgICAgQDAgICAgUEBAMEBgUGBgYFBgYGBwkIBgcJBwYGCAsICQoKCgoKBggLDAsKDAkKCgr/2wBDAQICAgICAgUDAwUKBwYHCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgr/wAARCADIAMgDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOE

In [46]:
#main function
img_df= build_base64_dataset(DATASET_DIR, IMAGES_PER_CLASS)

Found 30 images for letter A
Found 30 images for letter B
Found 30 images for letter C
Found 30 images for letter D
Found 30 images for letter E
Found 30 images for letter F
Found 30 images for letter G
Found 30 images for letter H
Found 30 images for letter I
Found 30 images for letter J
Found 30 images for letter K
Found 30 images for letter L
Found 30 images for letter M
Found 30 images for letter N
Found 30 images for letter O
Found 30 images for letter P
Found 30 images for letter Q
Found 30 images for letter R
Found 30 images for letter S
Found 30 images for letter T
Found 30 images for letter U
Found 30 images for letter V
Found 30 images for letter W
Found 30 images for letter X
Found 30 images for letter Y
Found 30 images for letter Z
Built dataset with 780 rows (target ~ 780).


In [47]:
#analysis of dataframe created
print("shape",img_df.shape)
# print("counts",img_df['label'].value_counts())
print("Head",img_df.head(10))

shape (780, 2)
Head                                         image_base64 label
0  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A
1  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A
2  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A
3  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A
4  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A
5  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A
6  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A
7  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A
8  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A
9  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAEBAQEBAQEBAQ...     A


In [None]:
#call the OpenAI LLM to classify this image
from openai import OpenAI
client = OpenAI(api_key="")



def classification_pipeline(df: pd.DataFrame) -> pd.DataFrame:
    predictions = []
    prompt = create_prompt('standard')
    # Process each image
    for image in df['image_base64']:
        try:
            response = run_model(prompt, image)
            predict = extract_letter_from_response(response)
            predictions.append(predict)
        except Exception as e:
            print(f"Error processing image: {e}")
            predictions.append("?")  # Default for failed predictions
    
    # Create new DataFrame with predictions
    new_df = df.copy()
    new_df['prediction'] = predictions
    
    return new_df


def run_model(prompt, base64_image):
    response = client.responses.create(
        model="gpt-4.1-mini",
        input=[
            {
                "role": "user",
                "content": [
                    { "type": "input_text", "text": prompt },
                    {
                        "type": "input_image",
                        "image_url": f"data:image/jpeg;base64,{base64_image}",
                    },
                ],
            }
        ],
    )
    return response

def extract_letter_from_response(result) -> str:
    """Extract single letter from LLM response"""
    # Clean the response
    raw_response = result.output[0].content[0].text
    response = raw_response.strip().upper()
    
    # Try to find a single letter
    import re
    letters = re.findall(r'[A-Z]', response)
    
    if letters:
        # If response is just a single letter, return it
        if len(response) == 1 and response in LETTERS:
            return response
        # Otherwise, return the first letter found
        return letters[0]
    
    return "?"  # Unknown if no letter found


In [53]:
#main functions
# img_df= build_base64_dataset(DATASET_DIR, IMAGES_PER_CLASS)
predictions = classification_pipeline(img_df)
print(predictions.head(10))

                                        image_base64 label prediction
0  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A          S
1  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A          A
2  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A          A
3  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A          A
4  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A          A
5  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A          S
6  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A          A
7  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A          A
8  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A          A
9  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAEBAQEBAQEBAQ...     A          A


In [30]:
#error analysis
print("encoded image",img_df['image_base64'][0])
# response = run_model(create_prompt('standard'), img_df['image_base64'][0])
# print(response.output_text)

encoded image nan


In [54]:
predictions.to_csv("predictions.csv", index=False)

In [3]:
test_df = pd.read_csv('predictions.csv')

In [None]:
print(test_df.head(5))
print(test_df.shape)
print(test_df.columns)
print()

                                        image_base64 label prediction
0  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A          S
1  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A          A
2  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A          A
3  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A          A
4  /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQ...     A          A
(780, 3)
Index(['image_base64', 'label', 'prediction'], dtype='object')
