In [1]:
import json
import pandas as pd
import os
from PIL import ImageDraw, Image
import numpy as np
from skimage.measure import label, regionprops

## Read annotations from anylabeling

In [2]:
path_to_read = "../annotations/"
dispositions = [x for x in os.listdir(path_to_read)]
annotation_verified = {}
keys_to_ignore = []
for disposition in dispositions:
    path_to_tile = os.path.join(path_to_read, disposition)
    annotations = [x for x in os.listdir(os.path.join(path_to_tile)) if x.endswith(".json")]
    for annotation in annotations:
        path_to_anno = os.path.join(path_to_tile, annotation)
        with open(path_to_anno, "r") as j:
            data = json.load(j)
        # Clean No visible defects from data
        labels = [x["label"] for x in data["shapes"]]
        if "NV" in labels:
            index = labels.index("NV")
            data["shapes"].pop(index)
            if len(data["shapes"]) > 0:
                annotation_verified[data["imagePath"]] = data["shapes"]
            else:
                keys_to_ignore.append(data["imagePath"])
        else:
            annotation_verified[data["imagePath"]] = data["shapes"]

In [3]:
len(annotation_verified), len(keys_to_ignore)

(190, 18)

# Convert anylabeling to COCO format

In [4]:
# Defining info part
from datetime import date
today = date.today()
# dd/mm/YY
d1 = today.strftime("%d/%m/%Y")

#"description": "COCO 2017 Dataset",
#"url": "http://cocodataset.org",
#"version": "1.0",
#"year": 2017,
#"contributor": "COCO Consortium",
#"date_created": "2017/09/01"

data = [["Qualitex 2024 Dataset", "https://www.aqc.technology/", "1.0", 2024, "AQC Industry",str(d1)]]

df_info = pd.DataFrame(data,
                       columns=["description", "url", "version", "year", "contributor", "data_created"])

df_info

Unnamed: 0,description,url,version,year,contributor,data_created
0,Qualitex 2024 Dataset,https://www.aqc.technology/,1.0,2024,AQC Industry,22/03/2024


In [5]:
# Defining License part
data = [['http://creativecommons.org/licenses/by-nc/2.0/', 1,'Attribution-NonCommercial License']]
df_license = pd.DataFrame(data,
                         columns=["url", "id", "name"])
df_license

Unnamed: 0,url,id,name
0,http://creativecommons.org/licenses/by-nc/2.0/,1,Attribution-NonCommercial License


In [6]:
# Defining images part
df_images = pd.DataFrame([])
img_name = []
aqc_url = []
height = 1024
width = 4096
url_subfix = "cqt_qualitex_fino/specola_20/"
for tile_name in annotation_verified:
    if tile_name not in keys_to_ignore:
        machine_counter = tile_name.split("_")[0]
        infrared_tile = "_".join(tile_name.split("_")[:-1]) + "_IR.jpg"
        visible_name = os.path.join(url_subfix, machine_counter, tile_name)
        infrared_name = os.path.join(url_subfix, machine_counter, infrared_tile)
        img_name.append(tile_name)
        img_name.append(infrared_tile)
        aqc_url.append(visible_name)
        aqc_url.append(infrared_name)

df_images["file_name"] = img_name

df_images["coco_url"] = aqc_url
df_images["height"] = [height]*len(df_images)
df_images["width"] = [width]*len(df_images)
df_images["license"] = [1]*len(df_images)

# Erase duplicates
df_images.drop_duplicates(subset="file_name", keep="first", inplace=True)
df_images.reset_index(inplace=True, drop=True)
df_images["id"] = list(range(1,len(df_images)+1))
df_images

Unnamed: 0,file_name,coco_url,height,width,license,id
0,M41_R59_C2_VIS.jpg,cqt_qualitex_fino/specola_20/M41/M41_R59_C2_VI...,1024,4096,1,1
1,M41_R59_C2_IR.jpg,cqt_qualitex_fino/specola_20/M41/M41_R59_C2_IR...,1024,4096,1,2
2,M43_R352_C3_VIS.jpg,cqt_qualitex_fino/specola_20/M43/M43_R352_C3_V...,1024,4096,1,3
3,M43_R352_C3_IR.jpg,cqt_qualitex_fino/specola_20/M43/M43_R352_C3_I...,1024,4096,1,4
4,M42_R83_C0_VIS.jpg,cqt_qualitex_fino/specola_20/M42/M42_R83_C0_VI...,1024,4096,1,5
...,...,...,...,...,...,...
375,M65_R129_C3_IR.jpg,cqt_qualitex_fino/specola_20/M65/M65_R129_C3_I...,1024,4096,1,376
376,M63_R249_C1_VIS.jpg,cqt_qualitex_fino/specola_20/M63/M63_R249_C1_V...,1024,4096,1,377
377,M63_R249_C1_IR.jpg,cqt_qualitex_fino/specola_20/M63/M63_R249_C1_I...,1024,4096,1,378
378,M61_R95_C2_VIS.jpg,cqt_qualitex_fino/specola_20/M61/M61_R95_C2_VI...,1024,4096,1,379


In [7]:
path = "./qualitex/DifettiCQT_2022.csv"
df_qualitex = pd.read_csv(path)
df_qualitex

Unnamed: 0,DEFECT CODE,Italiano,Inglese,category,Unnamed: 4
0,129,ABRASIONI,ABRASIONS,ABRASIONS,ABRASIONS
1,40,ALONI,FINISHING SPOTS,STAINS,STAINS
2,19,ASOLE,SNAGS,IRREGULAR YARN,IRREGULAR YARN
3,162,BANDING,BANDING,DEFECT OF PRINTING,DEFECT OF PRINTING
4,75,BARRATURA IN TRAMA,WEFT VEAWE BARS,DEFECT OF WEFT,DEFECT OF WEFT
...,...,...,...,...,...
85,73,TRAMA RIENTRANTE,INLAID WEFT,DEFECT OF WEFT,
86,74,TRAMA ROTTA PICCOLA,SMALL BROKEN WEFT,DEFECT OF WEFT,
87,3,TRAMA TESA,TENSE WEFT,DEFECT OF WEFT,
88,6,TRAME ROTTE,BROKEN WEFT,DEFECT OF WEFT,


In [8]:
# Defining Categories
df_cat = pd.DataFrame([])
cat = [annotation_verified[key][x]["label"] for key in annotation_verified for x in range(len(annotation_verified[key]))]
df_cat["name"] = cat
#df_cat = df_cat.drop(df_cat[df_cat['name'] == 'NV'].index)

df_cat.drop_duplicates(subset="name", keep="first", inplace=True)
#df_cat.replace("","defect", inplace=True)
df_cat.sort_values(by= "name", inplace= True)
df_cat.reset_index(drop=True, inplace=True)
df_cat["id"] = list(range(1,len(df_cat)+1))

supercategory = []
for cat in df_cat["name"].values:
    try: 
        cat = int(cat)
    except:
        pass
    if cat in df_qualitex["DEFECT CODE"].values:
        cat_group = df_qualitex[df_qualitex["DEFECT CODE"] == cat]["category"].values[0]
    else:
        cat_group = "DEFECT"
    supercategory.append(cat_group)
df_cat["supercategory"] = supercategory
df_cat

Unnamed: 0,name,id,supercategory
0,14,1,KNOTS
1,159,2,CREASES
2,18,3,IRREGULAR YARN
3,24,4,IRREGULAR YARN
4,36,5,ABRASIONS
5,38,6,STAINS
6,75,7,DEFECT OF WEFT
7,8,8,DEFECT OF WEFT
8,defect,9,DEFECT


In [9]:
def convert_rectangle_to_polygon(points):
    x_min, y_min = points[0]
    x_max, y_max = points[1]
    middle = float((y_min + y_max)/2)

    shape = [
        (x_min, y_min),
        (x_min, middle),
        (x_min, y_max),
        (x_max, y_max),
        (x_max, y_min)
    ]
    return shape

In [10]:
# Bounding box, label, area and mask generation
width = 4096
height = 1024
area = []
bbox = []
for key in annotation_verified:
    if key not in keys_to_ignore:
        annotations = annotation_verified[key]
        for annotation in annotations:
            mask = Image.new("L", (width, height), (0))
            if annotation["shape_type"] == "polygon":
                shape = [tuple(x) for x in annotation["points"]]
                if len(shape) < 5:
                    middle_point = tuple(np.mean(shape[:2], axis=0))
                    shape.insert(1, middle_point)
            else:
                shape = convert_rectangle_to_polygon(annotation["points"])

            if len(shape) > 1:
                mask_draw = ImageDraw.Draw(mask)
                mask_draw.polygon(shape, fill="#FFFFFF")
            else:
                mask_draw = ImageDraw.Draw(mask)
            # keep only gray one channel (gray for mask)
            mask = np.asarray(mask)
            # if i == 35:
            #     break
            # Mask Area
            area.append(np.sum(mask/255))
            area.append(np.sum(mask/255)) # It is duplicated for both images VIS and IR
            
            # Mask bbox

            label_image = label(mask)
            props = regionprops(label_image)
            merge_region = []
            flag = True
            if len(props) > 0: 
                for region in props:
                    if flag:
                        merge_region = list(region.bbox)
                        flag = False
                    else:
                        if region.bbox[0] < merge_region[0]:
                            merge_region[0] = region.bbox[0]
                        if region.bbox[1] < merge_region[1]:
                            merge_region[1] = region.bbox[1]
                        if region.bbox[2] > merge_region[2]:
                            merge_region[2] = region.bbox[2]
                        if region.bbox[3] > merge_region[3]:
                            merge_region[3] = region.bbox[3]

                bbox.append([merge_region[1], 
                            merge_region[0],
                            merge_region[3]-merge_region[1],
                            merge_region[2]-merge_region[0]])
                bbox.append([merge_region[1], 
                            merge_region[0],
                            merge_region[3]-merge_region[1],
                            merge_region[2]-merge_region[0]]) # It is duplicated for both images VIS and IR
            else:
                bbox.append([0,0,0,0])
                bbox.append([0,0,0,0])

In [11]:
len(area), len(bbox)

(560, 560)

In [14]:
coordinates = []
for key in annotation_verified:
    if key not in keys_to_ignore:
        annotations = annotation_verified[key]
        for annotation in annotations:
            if annotation["shape_type"] == "polygon":
                coords = [tuple(x) for x in annotation["points"]]
                if len(coords) < 5:
                    middle_point = tuple(np.mean(coords[:2], axis=0))
                    coords.insert(1, middle_point)
            else:
                coords = convert_rectangle_to_polygon(annotation["points"])
                
            # appended 2 times to account for visible and infrared
            coordinates.append([np.array(coords).ravel().tolist()])
            coordinates.append([np.array(coords).ravel().tolist()])

In [15]:
# Defining annotation
df_annotation = pd.DataFrame([])
df_annotation["segmentation"] = coordinates

#area = []
cat_id = []
img_id = []
for key in annotation_verified:
    if key not in keys_to_ignore:
        annotations = annotation_verified[key]
        for annotation in annotations:
            cat = annotation["label"]
            cat_id.append(df_cat[df_cat["name"]==cat]["id"].values[0])
            cat_id.append(df_cat[df_cat["name"]==cat]["id"].values[0])
            img_name = key
            img_id.append(df_images[df_images["file_name"]== img_name]["id"].values[0])
            img_id.append(df_images[df_images["file_name"]== img_name]["id"].values[0] + 1) # Add infrared image to same annotation


df_annotation["area"] = area
df_annotation["iscrowd"] = [0]*len(df_annotation)
df_annotation["id"] = list(range(1, len(df_annotation)+1))
df_annotation["image_id"] = img_id
df_annotation["category_id"] = cat_id
df_annotation

Unnamed: 0,segmentation,area,iscrowd,id,image_id,category_id
0,"[[3531.0, 919.0, 3531.0, 927.0, 3531.0, 935.0,...",765.0,0,1,1,1
1,"[[3531.0, 919.0, 3531.0, 927.0, 3531.0, 935.0,...",765.0,0,2,2,1
2,"[[2860.345679012346, 483.9506172839506, 2871.4...",655.0,0,3,3,6
3,"[[2860.345679012346, 483.9506172839506, 2871.4...",655.0,0,4,4,6
4,"[[2686.036036036036, 268.9189189189189, 2698.1...",499.0,0,5,3,6
...,...,...,...,...,...,...
555,"[[687.8787878787879, 241.09090909090912, 706.0...",1906.0,0,556,376,9
556,"[[3400.8846153846157, 372.0961538461538, 3400....",2478.0,0,557,377,9
557,"[[3400.8846153846157, 372.0961538461538, 3400....",2478.0,0,558,378,9
558,"[[2769.369369369369, 8.558558558558557, 2782.4...",863.0,0,559,379,9


In [16]:
df_annotation["bbox"] = list(bbox)
df_annotation

Unnamed: 0,segmentation,area,iscrowd,id,image_id,category_id,bbox
0,"[[3531.0, 919.0, 3531.0, 927.0, 3531.0, 935.0,...",765.0,0,1,1,1,"[3531, 919, 45, 17]"
1,"[[3531.0, 919.0, 3531.0, 927.0, 3531.0, 935.0,...",765.0,0,2,2,1,"[3531, 919, 45, 17]"
2,"[[2860.345679012346, 483.9506172839506, 2871.4...",655.0,0,3,3,6,"[2839, 453, 33, 31]"
3,"[[2860.345679012346, 483.9506172839506, 2871.4...",655.0,0,4,4,6,"[2839, 453, 33, 31]"
4,"[[2686.036036036036, 268.9189189189189, 2698.1...",499.0,0,5,3,6,"[2677, 268, 28, 24]"
...,...,...,...,...,...,...,...
555,"[[687.8787878787879, 241.09090909090912, 706.0...",1906.0,0,556,376,9,"[675, 236, 62, 88]"
556,"[[3400.8846153846157, 372.0961538461538, 3400....",2478.0,0,557,377,9,"[3400, 372, 118, 21]"
557,"[[3400.8846153846157, 372.0961538461538, 3400....",2478.0,0,558,378,9,"[3400, 372, 118, 21]"
558,"[[2769.369369369369, 8.558558558558557, 2782.4...",863.0,0,559,379,9,"[2769, 8, 50, 63]"


In [17]:
# Erase wrong segmentations
for i in range(len(df_annotation)):
   if len(df_annotation["segmentation"][i][0]) < 5:
       df_annotation.drop(i, inplace=True)
df_annotation.reset_index(drop=True, inplace=True)

In [18]:
df_annotation

Unnamed: 0,segmentation,area,iscrowd,id,image_id,category_id,bbox
0,"[[3531.0, 919.0, 3531.0, 927.0, 3531.0, 935.0,...",765.0,0,1,1,1,"[3531, 919, 45, 17]"
1,"[[3531.0, 919.0, 3531.0, 927.0, 3531.0, 935.0,...",765.0,0,2,2,1,"[3531, 919, 45, 17]"
2,"[[2860.345679012346, 483.9506172839506, 2871.4...",655.0,0,3,3,6,"[2839, 453, 33, 31]"
3,"[[2860.345679012346, 483.9506172839506, 2871.4...",655.0,0,4,4,6,"[2839, 453, 33, 31]"
4,"[[2686.036036036036, 268.9189189189189, 2698.1...",499.0,0,5,3,6,"[2677, 268, 28, 24]"
...,...,...,...,...,...,...,...
555,"[[687.8787878787879, 241.09090909090912, 706.0...",1906.0,0,556,376,9,"[675, 236, 62, 88]"
556,"[[3400.8846153846157, 372.0961538461538, 3400....",2478.0,0,557,377,9,"[3400, 372, 118, 21]"
557,"[[3400.8846153846157, 372.0961538461538, 3400....",2478.0,0,558,378,9,"[3400, 372, 118, 21]"
558,"[[2769.369369369369, 8.558558558558557, 2782.4...",863.0,0,559,379,9,"[2769, 8, 50, 63]"


## From Dataframe to JSON

In [19]:
# reordering DF
df_images = df_images[['license', 'file_name', 'coco_url', 'height', 'width','id']]
df_annotation = df_annotation[['segmentation', 'area', 'iscrowd','image_id','bbox','category_id','id']]
df_cat = df_cat[["supercategory", "id","name"]]

In [20]:
file = {}
result = df_info.to_json(orient="records")
parsed = json.loads(result)
file["info"] = parsed[0]

In [21]:
result = df_license.to_json(orient="records")
parsed = json.loads(result)
file["licenses"] = parsed

In [22]:
result = df_images.to_json(orient="records")
parsed = json.loads(result)
file["images"] = parsed

In [23]:
result = df_cat.to_json(orient="records")
parsed = json.loads(result)
file["categories"] = parsed

In [24]:
result = df_annotation.to_json(orient="records")
parsed = json.loads(result)
file["annotations"] = parsed

In [25]:
# Save JSON file
d1 = str(d1).replace("/","_")
os.makedirs("../output", exist_ok=True)
output_file = f"../output/annotations_qualitex_reviewed_{d1}.json"
with open(output_file, 'w') as json_file:
    json.dump(file, json_file)