In [None]:
import os, sys
repo = "/home/isaiah/hana_mammograph/isaiah/"
# repo = "/Users/isaiah/Github/hana_mammograph/isaiah/"
sys.path.insert(0, repo)
from os.path import isdir, abspath, dirname
from collections import defaultdict
import numpy as np
import json
import csv
from addict import Dict
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.optim import SGD, Adam
from torch.optim.lr_scheduler import ExponentialLR, CyclicLR
from dataset import MammoH5Data, GroupSampler
from models import DenseNet
from utils import printProgressBarRatio
import h5py
import pydicom

import matplotlib.pyplot as plt

In [None]:
# datapath = "/Users/isaiah/datasets/kaggle_mammograph/preprocessed/mammodata224_2.h5"
# metadatapath = "/Users/isaiah/datasets/kaggle_mammograph/preprocessed/metadata.json"
# dataidspath = "/Users/isaiah/datasets/kaggle_mammograph/preprocessed/train_split.json"
datapath = "/home/isaiah/kaggle/mammo/preprocessed/mammodata224_2.h5"
metadatapath = "/home/isaiah/kaggle/mammo/preprocessed/metadata_2.json"
dataidspath = "/home/isaiah/kaggle/mammo/preprocessed/train_split_2.json"

metadata_params = Dict({
    "test_set": False,
    "selected_columns": ['image_id', 'patient_id', 'laterality', 'view', 'age',
                       'cancer', 'implant', 'density', 'machine_id', 
                       'difficult_negative_case'],
    "default_value": 'na',
    "age_nan": "mean",
    "laterality_map": {'L': 0, 'R': 1},
    "view_map": {'CC': 0, 'MLO': 1},
    "density_map": {'A': 1, 'B': 2, 'C': 3, 'D': 4},
    "diff_neg_case_map": {"FALSE": 0, "TRUE": 1},
})

dataset_params = Dict({
    "augmentations": ["contrast_brightness", "flip", "rotate", "noise"],
    "labels": ["cancer", "laterality"],
    "sample_ratio": 0.5,
})
    
with open(dataidspath, "r") as f:
    data_ids = Dict(json.load(f))

# data_ids = {"train": {"cancer": [],
#                      "healthy": ["197998560", "408838480", "490308031", "1287996498", 
#                      "1568473810", "1634189725", "1903499763", "1977237482"]},
#            "val": {"cancer": [], 
#                    "healthy": []}
#           }

classes = ['cancer', 'healthy']
batch_size = 4

In [None]:
data = MammoH5Data("cpu", datapath, metadatapath,
                        dataset_params)
train_sampler = GroupSampler(data_ids["train"][classes[0]] + data_ids["val"][classes[0]], shuffle=True)
trainloader = DataLoader(data, batch_size, sampler=train_sampler)

In [None]:
(img_id, img, gt) = next(iter(trainloader))
print(gt)
fig, axs = plt.subplots(2, 2, figsize=(14, 14))
axs[0, 0].set_title(str(img_id.detach().numpy()[0]) + " " + str(img.detach().shape))
axs[0, 0].imshow(img.detach().numpy()[0].squeeze(), cmap="bone")
axs[0, 1].set_title(str(img_id.detach().numpy()[1]) + " " + str(img.detach().shape))                    
axs[0, 1].imshow(img.detach().numpy()[1].squeeze(), cmap="bone")
axs[1, 0].set_title(str(img_id.detach().numpy()[2]) + " " + str(img.detach().shape))
axs[1, 0].imshow(img.detach().numpy()[2].squeeze(), cmap="bone")
axs[1, 1].set_title(str(img_id.detach().numpy()[3]) + " " + str(img.detach().shape))
axs[1, 1].imshow(img.detach().numpy()[3].squeeze(), cmap="bone")
plt.show()

In [None]:
plt.close()

with h5py.File(datapath, "r") as f:
    im = f["197998560"][:]
fig2 = plt.figure(figsize=(10, 10))
ax2 = plt.axes()
ax2.imshow(im, cmap="bone")
plt.show()

image_file = "/home/dataset/kaggle/input/train_images/3768/197998560.dcm"
dcm = pydicom.dcmread(image_file)
ds = dcm.pixel_array
fig3 = plt.figure(figsize=(10, 10))
ax3 = plt.axes()
ax3.imshow(ds, cmap="bone")
plt.show()