In [218]:
import pandas as pd
import os
import random

import json
import pydicom

In [16]:
df = pd.read_csv("corflow/frame_selection/dataset.csv")
df

Unnamed: 0,tag_id,image_id,frame,segmentation,timestamp,user
0,84f6ac8a-39b8-46a0-9572-432ab85dffb4,131aedfhs6pnf1fvtvp49mjf02x1fl1f22,38,eJztne1is7YSBkmcvE7u/4bb2E5sQEK7Qrta0MyfnmAM6p...,2024-01-24 16:02:32.568104,lekandnow@gmail.com
1,4f573378-1388-46de-951b-95f1e44d390e,131aedfhs6pnf1fvtvp49mjf04wc7y5722,23,eJztnel6o7oSALmZLJ73f+FznWQSQFu31C2EVPVrYrPNV4...,2024-01-24 16:02:32.638365,lekandnow@gmail.com
2,09d328c9-8b75-464e-a68d-f30dd9db2499,131aedfhs6pnf1fvtvp49mjf04y0vtqu22,36,eJztnel2o7gWRulyust5/xe+1xlsBknoTBrM3j96pYMBZe...,2024-01-24 16:02:32.616167,lekandnow@gmail.com
3,b83370d1-4bbb-44f5-ad75-8f8f6b572836,131aedfhs6pnf1fvtvp49mk5l3qcg5hh22,18,eJztne1im7oSAJWe49O67//A9+LEtgAB+tjVrqyZP20dG0...,2024-01-24 16:13:02.512389,lekandnow@gmail.com
4,222454ee-2b93-4854-ad91-5d37147ac04e,131aedfhs6pnf1fvtvp49mk5l3ra0gpr22,21,eJztnVl2pDgQAHGX7Zmu+x94pnZAW0ogKaWM+GvbDfhFlA...,2024-01-24 16:13:02.879827,lekandnow@gmail.com
...,...,...,...,...,...,...
1349,20a157b9-9e37-4c98-be0f-ad8de7500e98,13c2ur549vohc0jat2dvu3xs0c1,20,eJzt3N1um0oAhVGrwX7/Rz7qaZomMPwMBjOevdZNL0IsrG...,2022-12-21 15:57:26.939252,sgurba@gmail.com
1350,ccee2841-9bbf-47ab-b0df-2fb65be95751,13c2ur549vohc0jat2dvu3xs2y1,24,eJzt3dtyGjsARFEXY/P/n3zKPk6wGWmugCL1Ws+hLLKbIW...,2022-12-21 15:57:26.987265,sgurba@gmail.com
1351,8f393fb5-7030-4518-9cd7-8c7ca0eeafce,13c2ur549vohc0jat2dvu3xs7q1,18,eJzt3dty20gSRVEG7enR/3/xDO22whJxKQBZALJyrce2zC...,2022-12-21 15:57:27.042089,sgurba@gmail.com
1352,e50dfcfb-dff2-438c-8305-b532b8dde8fd,13c2ur549vohc0jat2dvu3xs9w1,23,eJzt2wtO40gARdEM2L3/JY9QT2fIv0M+Jdc9ZwGopPuwHQ...,2022-12-21 15:57:27.042659,sgurba@gmail.com


In [17]:
json_files = os.listdir("corflow/frame_selection/metadata")
json_files = [f for f in json_files if f.endswith(".json")]
dcm_files = os.listdir("corflow/frame_selection/dicoms/")
dcm_files = [f for f in dcm_files if f.endswith(".dcm")]

json_image_ids = [f.replace(".json", "") for f in json_files]
dcm_image_ids = [f.replace(".dcm", "") for f in dcm_files]

In [18]:
print(f"Unique tag_id: {len(df.tag_id.unique())}")
print(f"Unique image_id: {len(df.image_id.unique())}")

print(f"DCM files: {len(dcm_files)}")
print(f"Metadata json files: {len(json_files)}")

Unique tag_id: 1354
Unique image_id: 570
DCM files: 541
Metadata json files: 570


In [19]:
parent_dir = "corflow/frame_selection/metadata/"

metadata_dicts = {}
for name in json_files:
    with open(parent_dir + name) as file:
        data = json.load(file)
        metadata_dicts[name.replace(".json", "")] = data


df_metadata = pd.DataFrame(metadata_dicts)

In [28]:
parent_dir = "corflow/frame_selection/dicoms/"

dic = pydicom.dcmread(parent_dir + dcm_files[0]).pixel_array

In [56]:
import plotly.graph_objects as go
from ipywidgets import widgets


# def plot_dcm(dcm):
dcm =pydicom.dcmread(parent_dir + dcm_files[0]) 
px_array=dcm.pixel_array
layer_slider = widgets.IntSlider(min=0, max=px_array.shape[0]-1, step=1, value=0, description='Frame')
widgets.interactive(lambda i: plt.imshow(px_array[i]), i=layer_slider)


interactive(children=(IntSlider(value=0, description='Frame', max=50), Output()), _dom_classes=('widget-intera…

In [243]:
class FrameSelectionDataset:
    def __init__(self, random_state=0):
        self.metadata_df = self.__load_json_metadata("corflow/frame_selection/metadata/")
        self.segmentations_df = pd.read_csv("corflow/frame_selection/dataset.csv")
        self.feedback_df = pd.read_csv("corflow/framerejectionfeedback.csv")
        self.dcm_filenames = self.__index_dicoms("corflow/frame_selection/dicoms/")  

        self.id_map = self.__id_map()
        
    def __load_json_metadata(self, parent_dir):
        json_files = os.listdir(parent_dir)
        json_files = [f for f in json_files if f.endswith(".json")]
        metadata_dicts = {}
        for name in json_files:
            with open(os.path.join(parent_dir, name)) as file:
                data = json.load(file)
                metadata_dicts[name.replace(".json", "")] = data
        
        return pd.DataFrame(metadata_dicts).T

    
    def __index_dicoms(self, parent_dir):
        dcm_files = os.listdir(parent_dir)
        dcm_files = [f for f in dcm_files if f.endswith(".dcm")]
        return {f.replace(".dcm", ""): os.path.join(parent_dir, f) for f in dcm_files}
        

    def get_dcm(self, image_id):
        if image_id not in self.dcm_filenames:
            raise ValueError(f"Image ID {image_id} not found.")

        return pydicom.dcmread(self.dcm_filenames[image_id])

    def get_segmentation(self, image_id, frame_no):
        image_id, frame_no = idx

        raise NotImplementedError

    def __getitem__(self, idx):
        image_id, frame_no = self.id_map[idx]
        print(image_id, frame_no)
        print(type(image_id), type(frame_no))

        dcm_img = pydicom.dcmread(self.dcm_filenames[image_id])
        print(dcm_img.pixel_array)
        frame = dcm_img.pixel_array[frame_no]
        return frame

    def __id_map(self, random_state=0):
        ids = self.good_frame_ids + self.bad_frame_ids
        random.shuffle(ids)
        return ids
        
    @property
    def segmented_frame_ids(self):
        return [(a, int(b)) for a, b in fsd.segmentations_df[["image_id", "frame"]].values]

    @property
    def good_frame_ids(self):
        return [(a, int(b)) for a, b in fsd.feedback_df[["IMAGE_ID", "SELECTEDFRAME"]].values]

    @property
    def bad_frame_ids(self):
        return [(a, int(b)) for a, b in fsd.feedback_df[["IMAGE_ID", "REJECTEDFRAME"]].values]
        

fsd = FrameSelectionDataset()

SyntaxError: invalid syntax (3336831838.py, line 51)

In [242]:
fsd[12]}

SyntaxError: unmatched '}' (2954089246.py, line 1)