In [1]:
import os #Import os lib for the file operations
import pandas as pd #For data manipulation and analysis
import numpy as np #For data manipulation and analysis
import cv2 #Import cv2 library for video and image processing
import json #To read JSON files
import plotly.express as px #Visualization lib
from tqdm import tqdm #Process bar to see progress
import warnings
warnings.filterwarnings('ignore')

In [2]:
#Two experts labeled images and upload to Github. Labeled frames are in JSON format.
adress = 'https://raw.githubusercontent.com/kynemre/CellSegmentation/master/DATA/BothJsonFiles.zip'; #JSON files path
!wget -q -O data.zip "{adress}"; # Download data as ZIP
!unzip -q data.zip # Extract ZIP file
!rm data.zip ## Delete ZIP file

DIR1Name = "ann_Emre" #Create folder name - expert1
DIR2Name = "ann_Tarik" #Create folder name - expert2

replace ann_Emre/frame1.jpg.json? [y]es, [n]o, [A]ll, [N]one, [r]ename: A


In [3]:
# This code block is to upload different data (labeled frames) in JSON format.
"""
DIR1Name = "Label_1" #Create a name for first folder
DIR2Name = "Label_2" #Create a name for second folder

!mkdir "{DIR1Name}" #Create the first folder
!mkdir "{DIR2Name}" #Create the second folder
""";

In [4]:
paths1 = os.listdir("/content/"+DIR1Name) #Adding the name of each JSON file in first folder to a list.
paths2 = os.listdir("/content/"+DIR2Name) #Adding the name of each JSON file in second folder to a list.
paths1.sort() #Sort the list
paths2.sort() #Sort the list

In [5]:
print("Check For Image Pairs")
for i in range(len(paths1)):
    print("{} and {} are pairing.".format(paths1[i], paths2[i]))

Check For Image Pairs
frame1.jpg.json and Image1.png.json are pairing.
frame13.jpg.json and Image13.png.json are pairing.
frame15.jpg.json and Image15.png.json are pairing.
frame153.jpg.json and Image153.png.json are pairing.
frame213.jpg.json and Image213.png.json are pairing.
frame217.jpg.json and Image217.png.json are pairing.
frame28.jpg.json and Image28.png.json are pairing.
frame3.jpg.json and Image3.png.json are pairing.
frame312.jpg.json and Image312.png.json are pairing.
frame34.jpg.json and Image34.png.json are pairing.
frame358.jpg.json and Image358.png.json are pairing.
frame493.jpg.json and Image493.png.json are pairing.
frame579.jpg.json and Image579.png.json are pairing.
frame75.jpg.json and Image75.png.json are pairing.
frame8.jpg.json and Image8.png.json are pairing.


#Dice Score

In [6]:
print("Dice Similarity Coefficient\n") 

for i in range(len(paths1)): #Loop for data
    with open("/content/"+DIR1Name+"/"+paths1[i]) as f: #Open expert 1 data file
        seg = json.load(f) #Read the data file
    with open("/content/"+DIR2Name+"/"+paths2[i]) as f: #Open expert 2 data file
        gt = json.load(f) #Read the data file

    HEIGHT = seg["size"]["height"] #Get the height of output from data
    WIDTH = seg["size"]["width"] #Get the width of output from data

    for j in range(len(seg["objects"])): #Draw each object(cell) of expert 1
        
        img_seg = np.zeros((HEIGHT,WIDTH)) #Create a canvas for output
        contours = np.array(seg["objects"][j]["points"]["exterior"]) #Get the corners
        cv2.fillPoly(img_seg, pts =[contours], color=(1,1,1)) #Add the object to the canvas
        
        found = False #For checking that is there same object/cell at the frame of expert 1 and expert 2
        for k in range(len(gt["objects"])):#Draw each object(cell) of expert 2 and compare
            
            if seg["objects"][j]["tags"][0]["name"] == gt["objects"][k]["tags"][0]["name"]: #Find the same cell at the frame of expert 1 and expert 2
                found = True #True if there is same cell at both frames
                img_gt = np.zeros((HEIGHT,WIDTH)) #Create a canvas
                contours = np.array(gt["objects"][k]["points"]["exterior"])#Get the corners
                cv2.fillPoly(img_gt, pts =[contours], color=(1,1,1)) #Add the object to the canvas

                dice = np.sum(img_seg[img_gt==1])*2.0 / (np.sum(img_seg) + np.sum(img_gt))#Calculate the dice score

                print('The {cell1} in the {data1} and the {cell2} in the {data2} consistence %{dice_score}'.format(
                    data1=paths1[i][:-5],
                    cell1=seg["objects"][j]["tags"][0]["name"],
                    data2=paths2[i][:-5],
                    cell2=gt["objects"][k]["tags"][0]["name"],
                    dice_score=round(dice*100,2)))#Print the comparison

        if not found:
            print("The {} in {} is not found".format(seg["objects"][j]["tags"][0]["name"],paths1[i][:-5]))

Dice Similarity Coefficient

The Cell_6 in the frame1.jpg and the Cell_6 in the Image1.png consistence %95.77
The Cell_5 in the frame1.jpg and the Cell_5 in the Image1.png consistence %93.65
The Cell_2 in the frame1.jpg and the Cell_2 in the Image1.png consistence %95.66
The Cell_8 in the frame1.jpg and the Cell_8 in the Image1.png consistence %94.39
The Cell_7 in the frame1.jpg and the Cell_7 in the Image1.png consistence %97.44
The Cell_1 in the frame1.jpg and the Cell_1 in the Image1.png consistence %96.44
The Cell_3 in the frame1.jpg and the Cell_3 in the Image1.png consistence %97.94
The Cell_4 in the frame1.jpg and the Cell_4 in the Image1.png consistence %95.65
The Cell_9 in the frame1.jpg and the Cell_9 in the Image1.png consistence %96.71
The Cell_6 in the frame13.jpg and the Cell_6 in the Image13.png consistence %95.49
The Cell_5 in the frame13.jpg and the Cell_5 in the Image13.png consistence %96.51
The Cell_7 in the frame13.jpg and the Cell_7 in the Image13.png consistence 

#Jaccard Index

In [7]:
print("Jaccard Similarity Coefficient\n")

df_jaccard = pd.DataFrame(columns=['Name','Frame','Coefficient'])
for i in range(len(paths1)):#Loop for data
    with open("/content/"+DIR1Name+"/"+paths1[i]) as f: #Open expert 1 data file
        seg = json.load(f) #Read the data file
    with open("/content/"+DIR2Name+"/"+paths2[i]) as f: #Open expert 2 data file
        gt = json.load(f) #Read the data file

    HEIGHT = seg["size"]["height"] #Get the height of output from data
    WIDTH = seg["size"]["width"] #Get the widtgh of output from data

    for j in range(len(seg["objects"])): #Draw each object(cell) of expert 1
        
        img_seg = np.zeros((HEIGHT,WIDTH)) #Create a canvas for output
        contours = np.array(seg["objects"][j]["points"]["exterior"])#Get the corners
        cv2.fillPoly(img_seg, pts =[contours], color=(1,1,1))#Add the object to the canvas
        
        found = False #For checking that is there same object/cell at the frame of expert 1 and expert 2 
        for k in range(len(gt["objects"])): #Draw each object(cell) of expert 2 and compare
            
            if seg["objects"][j]["tags"][0]["name"] == gt["objects"][k]["tags"][0]["name"]: #Find the same cell at the frame of expert 1 and expert 2
                found = True #True if there is same cell at both frames
                img_gt = np.zeros((HEIGHT,WIDTH)) #Create a canvas
                contours = np.array(gt["objects"][k]["points"]["exterior"]) #Get the corners
                cv2.fillPoly(img_gt, pts =[contours], color=(1,1,1)) #Add the object to the canvas

                jaccard = np.sum(img_seg[img_gt==1])/ (np.sum(img_seg) + np.sum(img_gt) - np.sum(img_seg[img_gt==1]))#Calculate the score

                df_jaccard = df_jaccard.append(pd.DataFrame({
                    'Cell Tag': seg["objects"][j]["tags"][0]["name"],
                    'Number of Frame': gt["tags"][0]["value"],
                    'Jaccard': round(jaccard,2)
                }, index=[0]),ignore_index=True)

                print('The {cell1} in the {data1} and the {cell2} in the {data2} consistence %{jaccard_score}'.format(
                    data1=paths1[i][:-5],
                    cell1=seg["objects"][j]["tags"][0]["name"],
                    data2=paths2[i][:-5],
                    cell2=gt["objects"][k]["tags"][0]["name"],
                    jaccard_score=round(jaccard*100,2)))#Yazdirma islemi

            
        if not found:
            print("The {} in {} is not found.".format(seg["objects"][j]["tags"][0]["name"],paths1[i][:-5]))

Jaccard Similarity Coefficient

The Cell_6 in the frame1.jpg and the Cell_6 in the Image1.png consistence %91.88
The Cell_5 in the frame1.jpg and the Cell_5 in the Image1.png consistence %88.06
The Cell_2 in the frame1.jpg and the Cell_2 in the Image1.png consistence %91.68
The Cell_8 in the frame1.jpg and the Cell_8 in the Image1.png consistence %89.38
The Cell_7 in the frame1.jpg and the Cell_7 in the Image1.png consistence %95.01
The Cell_1 in the frame1.jpg and the Cell_1 in the Image1.png consistence %93.12
The Cell_3 in the frame1.jpg and the Cell_3 in the Image1.png consistence %95.97
The Cell_4 in the frame1.jpg and the Cell_4 in the Image1.png consistence %91.66
The Cell_9 in the frame1.jpg and the Cell_9 in the Image1.png consistence %93.62
The Cell_6 in the frame13.jpg and the Cell_6 in the Image13.png consistence %91.36
The Cell_5 in the frame13.jpg and the Cell_5 in the Image13.png consistence %93.26
The Cell_7 in the frame13.jpg and the Cell_7 in the Image13.png consisten

In [8]:
#Sort the outputs
df_jaccard['sort'] = df_jaccard['Cell Tag'].str.extract('(\d+)', expand=False).astype(int)
df_jaccard.sort_values(['Frame','sort'],inplace=True, ascending=True)
df_jaccard = df_jaccard.drop('sort', axis=1)

In [9]:
fig = px.scatter(df_jaccard,
                 x="Number of Frame",
                 y="Cell Tag",
                 color="Jaccard",
                 size= 1-df_jaccard["Jaccard"]/1.1,
                 size_max = 15,
                 width=1200,
                 height=600,
                 title="Comparison of Expert Labelings",
                 )
fig.show()
# fig.write_image("Comparison_of_Expert_Labelings.png", scale=1.5) #Save the static chart
# fig.write_html("Comparison_of_Expert_Labelings.html") #Save the interactive chart