In [1]:
import torch

In [2]:
# check if GPU is available 
torch.cuda.is_available()

True

In [3]:
torch.cuda.current_device()

0

In [4]:
torch.cuda.get_device_name()

'NVIDIA RTX A6000'

In [5]:
torch.cuda.memory_allocated()

0

In [6]:
torch.cuda.memory_reserved()

0

In [7]:
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models # add models to the list
from torchvision.utils import make_grid
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from IPython.display import display
%matplotlib inline

# ignore harmless warnings
import warnings
warnings.filterwarnings("ignore")

In [8]:
PROJECT_ROOT = os.getcwd()

In [9]:
root = os.path.join(PROJECT_ROOT, 'brain tumor data')

In [10]:
os.listdir(root) 

['test', 'valid', 'README.txt', 'train']

In [11]:
test_path = root+'/test/'
train_path = root+'/train/'
validation_path = root + '/valid/'

In [12]:
import json
annotation_file = root + '/train/_annotations.coco.json'

In [13]:
# firstly, check annotation raw data file, and image_id should link with id in json file.
# so find mismath image_id  and id under annotations section

with open(annotation_file, "r") as f:
    coco_data = json.load(f)

# create a dictionary mapping correct image ids from the image section
valid_image_ids = {img['id'] for img in coco_data['images']}



In [14]:
# find mismatches: annotation with "image_id" values not in the the same as "id" values 
mismatched_annotation = []
for annotation in coco_data["annotations"]:
    annotation_id = annotation["id"]
    image_id = annotation["image_id"]
    if annotation_id != image_id:
        mismatched_annotation.append((annotation_id, image_id))

# print mismatch
if mismatched_annotation:
    print("found annotation where id and image_id are not match:")
    for annotation_id, image_id in mismatched_annotation:
        print(f"annotation id {annotation_id} has image_id {image_id} mismatch")
    else:
        print("all matches")

found annotation where id and image_id are not match:
annotation id 1006 has image_id 1005 mismatch
annotation id 1007 has image_id 1006 mismatch
annotation id 1008 has image_id 1007 mismatch
annotation id 1009 has image_id 1008 mismatch
annotation id 1010 has image_id 1009 mismatch
annotation id 1011 has image_id 1010 mismatch
annotation id 1012 has image_id 1011 mismatch
annotation id 1013 has image_id 1012 mismatch
annotation id 1014 has image_id 1013 mismatch
annotation id 1015 has image_id 1014 mismatch
annotation id 1016 has image_id 1015 mismatch
annotation id 1017 has image_id 1016 mismatch
annotation id 1018 has image_id 1017 mismatch
annotation id 1019 has image_id 1018 mismatch
annotation id 1020 has image_id 1019 mismatch
annotation id 1021 has image_id 1020 mismatch
annotation id 1022 has image_id 1021 mismatch
annotation id 1023 has image_id 1022 mismatch
annotation id 1024 has image_id 1023 mismatch
annotation id 1025 has image_id 1024 mismatch
annotation id 1026 has ima

In [15]:
# after checking the raw image data, image_id from 1005 does not assign correct, so raw annotation file needs to be modified:
# for mismatched image_id , need to update to match with id under the same annotation section. 
# correct image_id to match id for each annotation
with open(annotation_file, "r") as f:
    coco_data = json.load(f)
for annotation in coco_data["annotations"]:
    annotation["image_id"] = annotation["id"]

In [16]:
# save the fixed annotation file

fixed_annotation_file = os.path.join(train_path, "fixed_annotations.coco.json")
with open(fixed_annotation_file, "w") as f:
    json.dump(coco_data, f, indent = 4)

In [17]:

import json


In [18]:
with open(fixed_annotation_file, "r") as f:
    fixed_coco_data = json.load(f)

In [19]:
print("keys in coco json file:", fixed_coco_data.keys())

keys in coco json file: dict_keys(['info', 'licenses', 'categories', 'images', 'annotations'])


In [20]:
# check the key inside the first annotation entry
print("keys in coco[annotation] json file:", fixed_coco_data['annotations'][0].keys())

keys in coco[annotation] json file: dict_keys(['id', 'image_id', 'category_id', 'bbox', 'area', 'segmentation', 'iscrowd'])


In [21]:
print('keys in coco[images] json file:', fixed_coco_data['images'][0].keys())

keys in coco[images] json file: dict_keys(['id', 'license', 'file_name', 'height', 'width', 'date_captured'])


In [22]:
print('keys in coco[category] json file:', fixed_coco_data['categories'][0].keys())

keys in coco[category] json file: dict_keys(['id', 'name', 'supercategory'])


In [23]:
#print all supercategory value
for category in fixed_coco_data['categories']:
    print(f"category id: {category['id']}, Name: {category['name']}, supercateory: {category['supercategory']}")


category id: 0, Name: Tumor, supercateory: none
category id: 1, Name: 0, supercateory: Tumor
category id: 2, Name: 1, supercateory: Tumor


In [24]:
#extract image IDs and file names
image_id_to_name = {img["id"]: img["file_name"] for img in fixed_coco_data["images"]}
image_id_to_label = {ann["image_id"]: ann["category_id"] for ann in fixed_coco_data['annotations']}
# find images that missing labels
missing_labels = [image_id_to_name[i] for i in image_id_to_name.keys() if i not in image_id_to_label]

In [25]:
if missing_labels:
    print(f"{len(missing_labels)} images are missing labels!")
    print("example missing labels:", missing_labels[:100])
else:
    print("all images have labels assigned!")

all images have labels assigned!


In [26]:
# count how many images in training folder
import os
num_train_images = len([f for f in os.listdir(train_path) if f.endswith('.jpg')])
num_test_images=len([f for f in os.listdir(test_path) if f.endswith('.jpg')])
num_validation_images = len([f for f in os.listdir(validation_path) if f.endswith('.jpg')])

In [27]:
print(f"number of images in training folder: {num_train_images}")
print(f"number of images in training folder: {num_test_images}")
print(f"number of images in training folder: {num_validation_images}")

number of images in training folder: 1502
number of images in training folder: 215
number of images in training folder: 429


In [28]:
# check the annotation
for annotation in fixed_coco_data["annotations"][:10]:
    print(f"Image ID: {annotation['image_id']}, category id: {annotation['category_id']}")

# note: category id : 1- non-tumor;
# note: category id: 2- tumor;

Image ID: 0, category id: 1
Image ID: 1, category id: 1
Image ID: 2, category id: 1
Image ID: 3, category id: 1
Image ID: 4, category id: 1
Image ID: 5, category id: 1
Image ID: 6, category id: 1
Image ID: 7, category id: 2
Image ID: 8, category id: 2
Image ID: 9, category id: 1


In [29]:
import json
annotation_valid_file = root + '/valid/_annotations.coco.json'

In [30]:
with open(annotation_valid_file, "r") as f:
    coco_data_valid = json.load(f)

# create a dictionary mapping correct image ids from the image section
valid_image_ids = {img['id'] for img in coco_data_valid['images']}

In [31]:
# find mismatches: annotation with "image_id" values not in the the same as "id" values 
mismatched_valid_annotation = []
for annotation_valid in coco_data_valid["annotations"]:
    annotation_id = annotation_valid["id"]
    image_id = annotation_valid["image_id"]
    if annotation_id != image_id:
        mismatched_valid_annotation.append((annotation_id, image_id))

# print mismatch
if mismatched_valid_annotation:
    print("found annotation where id and image_id are not match:")
    for annotation_id, image_id in mismatched_valid_annotation:
        print(f"annotation id {annotation_id} has image_id {image_id} mismatch")
else:
    print("all matches")

all matches


In [32]:
# check test (unseen) file
import json
test_annotation_file = root + '/test/_annotations.coco.json'

In [33]:
with open(test_annotation_file, "r") as f:
    coco_data_test = json.load(f)

# create a dictionary mapping correct image ids from the image section
test_image_ids = {img['id'] for img in coco_data_test['images']}

In [34]:
# find mismatches: annotation with "image_id" values not in the the same as "id" values 
mismatched_test_annotation = []
for annotation_test in coco_data_test["annotations"]:
    annotation_id = annotation_test["id"]
    image_id = annotation_test["image_id"]
    if annotation_id != image_id:
        mismatched_test_annotation.append((annotation_id, image_id))

# print mismatch
if mismatched_test_annotation:
    print("found annotation where id and image_id are not match:")
    for annotation_id, image_id in mismatched_test_annotation:
        print(f"annotation id {annotation_id} has image_id {image_id} mismatch")
else:
    print("all matches")

all matches
