**Import libraries**

In [2]:
from glob import glob 
# from google.colab import files
import matplotlib.image as mpimg
import pickle
import random
import matplotlib.pyplot as plt
import csv
import numpy as np
import cv2
import progressbar
import datetime

**Retrieve images from external sources**

In [18]:
# Clone the GitHub repo of mahjong tile images
!git clone https://github.com/camerash/mahjong-dataset

fatal: destination path 'mahjong-dataset' already exists and is not an empty directory.


In [19]:
# Download Describable Textures Dataset (DTD)
!wget https://www.robots.ox.ac.uk/~vgg/data/dtd/download/dtd-r1.0.1.tar.gz

--2024-03-11 22:44:08--  https://www.robots.ox.ac.uk/~vgg/data/dtd/download/dtd-r1.0.1.tar.gz
Resolving www.robots.ox.ac.uk (www.robots.ox.ac.uk)... 129.67.94.2
Connecting to www.robots.ox.ac.uk (www.robots.ox.ac.uk)|129.67.94.2|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://thor.robots.ox.ac.uk/datasets/dtd/dtd-r1.0.1.tar.gz [following]
--2024-03-11 22:44:09--  https://thor.robots.ox.ac.uk/datasets/dtd/dtd-r1.0.1.tar.gz
Resolving thor.robots.ox.ac.uk (thor.robots.ox.ac.uk)... 129.67.95.98
Connecting to thor.robots.ox.ac.uk (thor.robots.ox.ac.uk)|129.67.95.98|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 625239812 (596M) [application/octet-stream]
Saving to: ‘dtd-r1.0.1.tar.gz’


2024-03-11 22:44:35 (23.3 MB/s) - ‘dtd-r1.0.1.tar.gz’ saved [625239812/625239812]



In [20]:
# Extract the DTD
!tar xf dtd-r1.0.1.tar.gz

In [21]:
# Delete the zip folder of DTD
!rm dtd-r1.0.1.tar.gz

In [21]:
# Load all *jpg from dtd subdirectories and save them in a pickle file (1x)

backgrounds_pck_fn="backgrounds.pck"
dtd_dir="dtd/images/"
bg_images=[]
print("Loading images ... (It could take several minutes)")
for subdir in glob(dtd_dir+"/*"):
    for f in glob(subdir+"/*.jpg"):
        bg_images.append(mpimg.imread(f))
pickle.dump(bg_images,open(backgrounds_pck_fn,'wb'))

class Backgrounds():
    def __init__(self,backgrounds_pck_fn=backgrounds_pck_fn):
        self._images=pickle.load(open(backgrounds_pck_fn,'rb'))
        self._nb_images=len(self._images)
        print("Number of images loaded :", self._nb_images)
    def get_random(self, display=False):
        bg=self._images[random.randint(0,self._nb_images-1)]
        if display: plt.imshow(bg)
        return bg

backgrounds = Backgrounds()

Loading images ... (It could take several minutes)
Number of images loaded : 5640


**Edit directory**

In [23]:
# Create a directory that will contain the images we generate
data_dir="images"
!mkdir images

mkdir: images: File exists


**Define functions**

In [25]:
# Rotate the image to the angle as specified
def rotate_tile(image, angle):
    # grab the dimensions of the image and then determine the
    # center
    (h, w) = image.shape[:2]
    (cX, cY) = (w // 2, h // 2)

    border_color=(255, 255, 255)

    # grab the rotation matrix (applying the negative of the
    # angle to rotate clockwise), then grab the sine and cosine
    # (i.e. the rotation components of the matrix)
    M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])
 
    # compute the new bounding dimensions of the image
    nW = int((h * sin) + (w * cos))
    nH = int((h * cos) + (w * sin))
 
    # adjust the rotation matrix to take into account translation
    M[0, 2] += (nW / 2) - cX
    M[1, 2] += (nH / 2) - cY
 
    # perform the actual rotation and return the image
    return cv2.warpAffine(image, M, (nW, nH))

In [26]:
# Resize an image
def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
    # initialize the dimensions of the image to be resized and
    # grab the image size
    dim = None
    (h, w) = image.shape[:2]

    # if both the width and height are None, then return the
    # original image
    if width is None and height is None:
        return image

    # check to see if the width is None
    if width is None:
        # calculate the ratio of the height and construct the
        # dimensions
        r = height / float(h)
        dim = (int(w * r), height)

    # otherwise, the height is None
    else:
        # calculate the ratio of the width and construct the
        # dimensions
        r = width / float(w)
        dim = (width, int(h * r))

    # resize the image
    resized = cv2.resize(image, dim, interpolation = inter)

    # return the resized image
    return resized

In [27]:
def create_image_in_grid_format(csv_filename, grid_length=4):
  (tile_images, tile_type_of_tile_images) = genrate_images(grid_length)
  tile_type_of_tile_images_for_output = tile_type_of_tile_images[:]

  # Randomly select a background image
  background_image = backgrounds.get_random()

  # Copy the background image such that the image would not be mutated  
  output_image = cv2.cvtColor(background_image, cv2.COLOR_RGB2RGBA).copy()
  
  # Resize the background image to standard width and height (i.e. 1024x1024)
  # The resize action would result int distortion
  resize_specifications = (image_standard_width, image_standard_height)
  output_image = cv2.resize(output_image, resize_specifications, 
                            interpolation = cv2.INTER_AREA)
  csv_row_data_placeholder = []

  # Place the grid on the resized background image, 
  # where the black area of the grid would be perceived as transparent
  for row_index in range(grid_length): 
    for column_index in range(grid_length):
      tile_img = tile_images[0]
      tmp = cv2.cvtColor(tile_img, cv2.COLOR_BGR2GRAY)
      _,alpha = cv2.threshold(tmp,0,255,cv2.THRESH_BINARY)
      b, g, r, a = cv2.split(tile_img)
      rgba = [b,g,r, alpha]
      tile_img = cv2.merge(rgba,4)

      x_offset = int(((column_index / grid_length) * output_image.shape[1]))
      y_offset = int((((row_index) / grid_length) * output_image.shape[0]))

      y1, y2 = y_offset, y_offset + tile_img.shape[0]
      x1, x2 = x_offset, x_offset + tile_img.shape[1]
      
      alpha_s = tile_img[:, :, 3] / 255.0
      alpha_l = 1.0 - alpha_s
      
      for c in range(0, 3):
        output_image[y1:y2, x1:x2, c] = (alpha_s * tile_img[:, :, c] + alpha_l 
                                         * output_image[y1:y2, x1:x2, c])
        
      # Compute the value of relative coordinates
      min_x = x1/output_image.shape[1]
      max_x = x2/output_image.shape[1]
      min_y = y1/output_image.shape[0]
      max_y = y2/output_image.shape[0]

      # Write data to csv: 
      # (1) the image filename, 
      # (2) tile type and 
      # (3) top-left and bottom-right vertices coordinates 
      #     of the mahjong tile in the image 
      csv_row_data_placeholder.append(
          (tile_type_of_tile_images[0],min_x,min_y,max_x,max_y))
      
      del tile_images[0]
      del tile_type_of_tile_images[0]

  # Save the image
  now = datetime.datetime.now()
  
  string_of_current_moment = str(now.year) + '-' + \
                              str(now.month).zfill(2) + '-' + \
                              str(now.day).zfill(2) + '-'  + \
                              str(now.hour).zfill(2) + '-' + \
                              str(now.minute).zfill(2) + '-' + \
                              str(now.second).zfill(2) + '-' + \
                              str(now.microsecond)  
  
  filename_of_img_created = data_dir + '/' + string_of_current_moment + '.jpg'
  cv2.imwrite(filename_of_img_created, output_image)

  # Append row in csv to record labelling information
  dataset_types = ['TRAIN', 'TEST', 'VALIDATION']
  set_type = random.choices(dataset_types, weights=(80,10,10), k=1)[0]
  
  for row_data in csv_row_data_placeholder:
    (target_tile_type,min_x,min_y,max_x,max_y) = row_data
    row_data_to_be_written_in_csv = [set_type,
                                     filename_of_img_created,
                                     target_tile_type,
                                     min_x,min_y,'','',
                                     max_x,max_y,'',''] 
    append_row_to_csv(csv_filename,row_data_to_be_written_in_csv, 'a')

  return tile_type_of_tile_images_for_output

In [28]:
def genrate_images(grid_length=4):  

  directory_of_github_repo_sampling_images = 'mahjong-dataset/tiles-resized/'

  grid_size = grid_length * grid_length

  tile_images = []
  tile_type_of_tile_images = []

  for grid_index in range(grid_size):    
    selected_tile_type = random.choice(list(lookup_filename_of_tile_img.keys()))
    image_samples = lookup_filename_of_tile_img[selected_tile_type]
    selected_sample_image = cv2.imread(
        directory_of_github_repo_sampling_images + \
        random.choices(image_samples, k=1)[0])
    
    selected_sample_image = cv2.cvtColor(selected_sample_image, 
                                         cv2.COLOR_RGB2RGBA).copy()  
    
    # Rotate the image
    rotated_sample_image = rotate_tile(selected_sample_image, 
                                       random.randrange(5,355))

    # The size of the image would change after rotation, 
    # and thus the rotated image has to be resized, 
    # by specifying its width to the standard width
    resized_sample_image = \
    image_resize(rotated_sample_image, 
                 height=int(image_standard_height/(grid_length * 1.5)))
    resized_sample_image = \
    image_resize(resized_sample_image, 
                 width=int(image_standard_width/(grid_length * 1.5)))
    tile_images.append(resized_sample_image)
    tile_type_of_tile_images.append(selected_tile_type)

  return (tile_images, tile_type_of_tile_images)

In [29]:
# Check if the annotation target is met.  Annotation target is met only when 
# all tile types have the annotation number that is 
# equal or larger than the threshold
def check_if_annotation_target_is_met(tile_type_annotations, 
                                      annotations_target_threshold):
  for annotation_occurrence in tile_type_annotations.values():
    if annotation_occurrence < annotations_target_threshold:
      return False
  
  return True

In [36]:
# Decalre variables to set the size of images in the dataset
image_standard_width = image_standard_height = 1024

# Declare variables to determine the size of grid.
# E.g. for a 4x4 grid, in each image there would be 16 mahjong tiles.
min_grid_length = 2
max_grid_length = 8

# Declare the variable to determine 
# how many annotations are needed for each tile type
target_number_of_annotations_for_each_tile_types = 1000

In [31]:
# Count the annotation number for determining the progress % in the progress bar
def count_annotations_number_for_updating_progress_bar(tile_type_annotations, 
                                                      annotations_target):
    result = 0
    for annotation_occurrence in tile_type_annotations.values():
      if annotation_occurrence < annotations_target:
        result += annotation_occurrence
      else:
        result += annotations_target
    return result

In [32]:
# Write data to a csv file
def append_row_to_csv(csv_filename, data, write_mode):
  with open(csv_filename, write_mode, encoding='UTF8') \
  as dataset_csvfile_for_model_training:
    writer = csv.writer(dataset_csvfile_for_model_training)
    writer.writerow(data)
    dataset_csvfile_for_model_training.close()

In [33]:
# Read labelling data from the csv file

lookup_filename_of_tile_img = {}
tile_type_annotations = {}

with open('mahjong-dataset/tiles-data/data.csv', newline='') as csvfile:
  table = csv.reader(csvfile, delimiter=' ')
  for row in table:
    row_data = row[0].split(',')
    img_file = row_data[0]
    tile_type = row_data[2]
    if (tile_type == 'label-name' or ('bonus-' in tile_type)):
      continue
    if ((tile_type in tile_type_annotations) is False):
      tile_type_annotations[tile_type] = 0

    if (tile_type in lookup_filename_of_tile_img):
      lookup_filename_of_tile_img[tile_type].append(img_file)
    else:
      lookup_filename_of_tile_img[tile_type] = [img_file]
  csvfile.close()

In [37]:
# Create a csv file that would contain the annotation information of the dataset images
csv_filename = 'dataset-for-training-AutoML-Vision-model.csv'
header = ['set','path','label','x_min','y_min','x_max',
          'y_min','x_max','y_max','x_min','y_max']
append_row_to_csv(csv_filename, header, 'w')

# Create a progress bar to display progress
total_number_of_tile_types = 34
progress_bar_max_val = total_number_of_tile_types * \
                        target_number_of_annotations_for_each_tile_types

bar = progressbar.ProgressBar(max_value=progress_bar_max_val, 
                              min_value=0, 
                              widgets=[progressbar.Bar('=', '[', ']'),
                                       ' ', 
                                       progressbar.Percentage()])
annotations_number_for_updaing_progress_bar = 0
bar.start()
bar.update(annotations_number_for_updaing_progress_bar)

# Generate images until the number of annotation target is met
annotation_target_is_met = False
total_number_of_images_generated = 0
while not annotation_target_is_met:  
  grid_length = random.randint(min_grid_length, max_grid_length)
  tile_types_annotated = create_image_in_grid_format(csv_filename, 
                                                     grid_length)
  total_number_of_images_generated += 1
  for tile_type in tile_types_annotated:
    tile_type_annotations[tile_type] += 1
  annotations_number_for_updaing_progress_bar = \
  count_annotations_number_for_updating_progress_bar(
      tile_type_annotations, 
      target_number_of_annotations_for_each_tile_types)
  bar.update(annotations_number_for_updaing_progress_bar)
  annotation_target_is_met = \
  check_if_annotation_target_is_met(tile_type_annotations, target_number_of_annotations_for_each_tile_types)

bar.finish()
print("Total number of images generated: " + str(total_number_of_images_generated))

total_number_of_annotations = sum(tile_type_annotations.values())
print("Total number of annotations created: " + str(total_number_of_annotations))

[                                                                        ] [38;2;255;0;0m  0%[39m


Total number of images generated: 993
Total number of annotations created: 35808


## Convert Auto ML to Yolo Mapping

In [41]:
import csv
import os
# from collections import defaultdict

# Define file paths
csv_file_path = 'dataset-for-training-AutoML-Vision-model.csv'
output_directory = 'training/yolo_format/'
class_mapping_file_path = 'yolo_class_mapping.txt'

# Make sure the output directory exists
os.makedirs(output_directory, exist_ok=True)

# Initialize a set to hold unique class names
class_names = set()
with open(csv_file_path, newline='') as csvfile:
    reader = csv.reader((line.replace(',,', ', ,') for line in csvfile))  # Replace 'empty' fields marked by ',,' with ', ,'
    next(reader)  # Skip header
    for row in reader:
        class_names.add(row[2])  # Add class label to the set

# Convert class names set to a list and sort it for consistent class index mapping
class_names = sorted(list(class_names))

# Create a dictionary to map class names to indices
class_mapping = {name: index for index, name in enumerate(class_names)}

# Process the CSV file to convert to YOLO format
with open(csv_file_path, newline='') as csvfile:
    reader = csv.reader((line.replace(',,', ', ,') for line in csvfile))  # Replace 'empty' fields marked by ',,' with ', ,'
    next(reader)  # Skip the header
    
    for row in reader:
        set_type = row[0].lower()
        img_file = os.path.basename(row[1])
        class_label = row[2]
        x_min = float(row[3])
        y_min = float(row[4])
        x_max = float(row[7])
        y_max = float(row[8])

        x_center = (x_min + x_max) / 2
        y_center = (y_min + y_max) / 2
        width = x_max - x_min
        height = y_max - y_min

        class_index = class_mapping[class_label]

        txt_file_path = os.path.join(output_directory, f"{set_type}_labels", img_file.replace('.jpg', '.txt'))
        os.makedirs(os.path.dirname(txt_file_path), exist_ok=True)
        
        with open(txt_file_path, 'a') as yolo_file:
            yolo_file.write(f"{class_index} {x_center} {y_center} {width} {height}\n")

# Save the class mappings to a text file so you have a reference to which indices correspond to which classes

with open(os.path.join(output_directory, class_mapping_file_path), 'w') as class_file:
    for class_index, class_name in enumerate(class_names):
        class_file.write(f"{class_name}\n")

## Clone yolo and start training

In [None]:
%%bash
git clone https://github.com/ultralytics/yolov5.git
    cd yolov5
    pip install -r requirements.txt

In [44]:
import os
import shutil

# Define your current directories
labels_directory = 'training/yolo_format'
images_directory = 'images'  # Assuming this contains all images (unsorted)

# Define the base directory for the YOLOv5 project
yolov5_base_directory = './yolov5'
dataset_name = 'mahjong'  # Name of your dataset

# Define the new paths for images and labels
new_dataset_base = os.path.join(yolov5_base_directory, 'datasets', dataset_name)
new_images_base = os.path.join(new_dataset_base, 'images')
new_labels_base = os.path.join(new_dataset_base, 'labels')

# Set names for train, val, and test sets
sets = ['train', 'val', 'test']

# Create the directories for images and labels based on sets if they do not exist
for set_name in sets:
    os.makedirs(os.path.join(new_images_base, set_name), exist_ok=True)
    os.makedirs(os.path.join(new_labels_base, set_name), exist_ok=True)

# Function to copy labels and images with a progress bar
def copy_labels_and_images_with_progress(label_set_name, src_labels_dir, dest_images_dir, dest_labels_dir):
    if not os.path.exists(src_labels_dir):
        print(f"No labels directory found for {label_set_name}, skipping.")
        return

    label_files = os.listdir(src_labels_dir)
    widgets = [
        f'Copying {label_set_name}: ', progressbar.Percentage(), ' ',
        progressbar.Bar(marker=progressbar.RotatingMarker()), ' ', progressbar.ETA()
    ]
    bar = progressbar.ProgressBar(widgets=widgets, max_value=len(label_files)).start()

    for i, label_file in enumerate(label_files):
        # Copy label file
        src_label_path = os.path.join(src_labels_dir, label_file)
        dest_label_path = os.path.join(dest_labels_dir, label_file)
        shutil.copy(src_label_path, dest_label_path)

        # Determine corresponding image file and copy it
        image_file_name = label_file.replace('.txt', '.jpg')  # Assumes images are .jpg
        src_image_path = os.path.join(images_directory, image_file_name)
        dest_image_path = os.path.join(dest_images_dir, image_file_name)
        if os.path.exists(src_image_path):
            shutil.copy(src_image_path, dest_image_path)
        else:
            print(f"Warning: Corresponding image file not found for {src_image_path}")

        bar.update(i+1)
    bar.finish()

# Copy labels and images for each set using the progress bar
for set_name in sets:
    copy_labels_and_images_with_progress(
        label_set_name=set_name,
        src_labels_dir=os.path.join(labels_directory, f"{set_name}_labels"),
        dest_images_dir=os.path.join(new_images_base, set_name),
        dest_labels_dir=os.path.join(new_labels_base, set_name),
    )

Copying train: [38;2;255;0;0m  0%[39m |/                                         | ETA:  --:--:--
Copying train: [38;2;255;104;0m 17%[39m |-                                         | ETA:   0:00:00
Copying train: [38;2;255;154;0m 35%[39m |\                                         | ETA:   0:00:00
Copying train: [38;2;255;239;0m 54%[39m ||                                         | ETA:   0:00:00
Copying train: [38;2;212;255;0m 72%[39m |/                                         | ETA:   0:00:00
Copying train: [38;2;159;255;0m 91%[39m |-                                         | ETA:   0:00:00
Copying train: [38;2;0;255;0m100%[39m ||                                         | Time:  0:00:00
Copying val: [38;2;255;0;0m  0%[39m |/                                           | ETA:  --:--:--
Copying val: [38;2;0;255;0m100%[39m ||                                           | Time:  0:00:00
Copying test: [38;2;255;0;0m  0%[39m |/                                          | ETA: 

In [45]:
!cp mahjong.yaml yolov5/

In [47]:
%%bash
cd yolov5/
python train.py --include coreml --img 1024 --batch 16 --epochs 100 --data mahjong.yaml --cfg ./models/yolov5s.yaml --weights yolov5s.pt --name mahjong_train

[34m[1mtrain: [0mweights=yolov5s.pt, cfg=./models/yolov5s.yaml, data=mahjong.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=100, batch_size=16, imgsz=1024, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, evolve_population=data/hyps, resume_evolve=None, bucket=, cache=None, image_weights=False, device=mps, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train, name=mahjong_train, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest, ndjson_console=False, ndjson_file=False
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v7.0-294-gdb125a20 Python-3.12.1 torch-2.2.1 MPS

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bia

Process is interrupted.


In [3]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolov8n.yaml")  # build a new model from scratch
model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)

# Use the model
model.train(data="mahjong_yolov8.yaml", imgsz=1024, epochs=3, batch=128, device="mps")  # train the model
metrics = model.val()  # evaluate model performance on the validation set
# results = model("https://ultralytics.com/images/bus.jpg")  # predict on an image
onnx_path = model.export(format="onnx")  # export the model to ONNX format
coreml_path = model.export(format="coreml")  # export the model to CoreML format
tflite_path = model.export(format="tflite")  # export the model to TensorFlow Lite format

Ultralytics YOLOv8.1.27 🚀 Python-3.12.1 torch-2.2.1 MPS (Apple M1 Max)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=mahjong_yolov8.yaml, epochs=300, time=None, patience=100, batch=64, imgsz=1024, save=True, save_period=-1, cache=False, device=mps, workers=8, project=None, name=train14, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=T

[34m[1mtrain: [0mScanning /Users/xingfanxia/projects/mahjong_hand_classifier/datasets/mahjong/labels/train.cache... 801 images, 0 backgrounds, 0 corrupt: 100%|██████████| 801/801 [00:00<?, ?it/s]
[34m[1mval: [0mScanning /Users/xingfanxia/projects/mahjong_hand_classifier/datasets/mahjong/labels/val.cache... 89 images, 0 backgrounds, 0 corrupt: 100%|██████████| 89/89 [00:00<?, ?it/s]


Plotting labels to /Users/xingfanxia/projects/mahjong_hand_classifier/runs/detect/train14/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000263, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 1024 train, 1024 val
Using 0 dataloader workers
Logging results to [1m/Users/xingfanxia/projects/mahjong_hand_classifier/runs/detect/train14[0m
Starting training for 300 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/300         0G          0       1311          0       2615       1024:   8%|▊         | 1/13 [01:23<16:41, 83.47s/it]


KeyboardInterrupt: 