In [1]:
! pip install --no-index --find-links=/kaggle/input/monai-whl monai
! pip install --no-index --find-links=/kaggle/input/torchio torchio
! pip install --no-index --find-links=/kaggle/input/addict addict

Looking in links: /kaggle/input/monai-whl
Processing /kaggle/input/monai-whl/monai-1.1.0-202212191849-py3-none-any.whl
Installing collected packages: monai
Successfully installed monai-1.1.0
[0mLooking in links: /kaggle/input/torchio
Processing /kaggle/input/torchio/torchio-0.18.86-py2.py3-none-any.whl
Processing /kaggle/input/torchio/Deprecated-1.2.13-py2.py3-none-any.whl
Processing /kaggle/input/torchio/shellingham-1.5.0.post1-py2.py3-none-any.whl
Installing collected packages: shellingham, Deprecated, torchio
Successfully installed Deprecated-1.2.13 shellingham-1.5.0.post1 torchio-0.18.86
[0mLooking in links: /kaggle/input/addict
Processing /kaggle/input/addict/addict-2.4.0-py3-none-any.whl
Installing collected packages: addict
Successfully installed addict-2.4.0
[0m

In [2]:
import os, sys, platform, subprocess
repo = "/kaggle/input/rsna-breast-cancer-2023/"
sys.path.insert(0, repo)
from os.path import join, dirname, basename, abspath
import time
import numpy as np
import pandas as pd
import pydicom
import SimpleITK as sitk
import cv2
import torch
import tensorflow
import matplotlib.pyplot as plt
import yaml
import h5py
import json
from glob import glob
# import addict
from addict import Dict
# import monai
import monai
# import torchio
import torchio as tio
# import custom classes

from submission import Submission
from preprocessing import MammoPreprocess, MetadataPreprocess
from splitdata import SplitData

In [3]:
def randresult(md_path):
    test_md = pd.read_csv(md_path)
    pats = test_md["patient_id"].tolist()
    lats = test_md["laterality"].tolist()
    pat_ids = ["_".join(item) for item in zip(map(str, pats), lats)]
    rand_cancer = np.random.rand(len(pat_ids))
    raw_results = pd.DataFrame({"prediction_id": pat_ids, "cancer": rand_cancer})
    results = raw_results.groupby("prediction_id")["cancer"].mean()
    return results

def preprocess_loop(cfgs):
    timesheet = Dict()
    prep_init_start = time.time()
    paths = cfgs.paths
    pcfgs = cfgs.preprocess_params
    data_prep = MammoPreprocess(paths.data_src, paths.data_dest,
                                                  pcfgs.file_extension, pcfgs.resolution,
                                                  pcfgs.init_downsample_ratio,
                                                  pcfgs.normalization)
    prep_init_end = time.time()
    prep_init_time = prep_init_end - prep_init_start

    mcfgs = cfgs.metadata_params
    md_init_start = time.time()
    mdata_prep = MetadataPreprocess(paths.metadata_src, paths.metadata_dest,
                                    mcfgs)
    md_init_end = time.time()
    md_init_time = md_init_end - md_init_start

    md_proc_start = time.time()
    mdata_prep.GenerateMetadata()
    mdata_prep.Save()
    md_proc_end = time.time()
    md_proc_time = md_proc_end - md_proc_start

    prep_proc_start = time.time()
    data_prep.GenerateDataset()
    prep_proc_end = time.time()
    prep_proc_time = prep_proc_end - prep_proc_start

    timesheet.metadata.initialization = md_init_time
    timesheet.metadata.process = md_proc_time
    timesheet.preprocessing.initialization = prep_init_time
    timesheet.preprocessing.process = prep_proc_time

    with open(paths.timesheet_dest, "w") as f:
        json.dump(timesheet, f, indent=4)
    print(f"Timesheet created in {paths.timesheet_dest}.")
    return

def splitdata_loop(cfgs):
    paths = cfgs.paths
    SplitData(paths.metadata_dest, paths.data_ids_dest,
         cfgs.preprocess_params.test_set,
         cfgs.preprocess_params.num_samples,
         cfgs.preprocess_params.test_size)
    return

def main(cfile):
    cfgs = Dict(yaml.load(open(abspath(cfile), "r"), Loader=yaml.Loader))
    preprocess_loop(cfgs)
    splitdata_loop(cfgs)

In [4]:
test_md_path = "/kaggle/input/rsna-breast-cancer-detection/test.csv"
config_file = os.path.join(repo, "config/test_config.yaml")
main(config_file)

Metadata file created in /kaggle/working/preprocessed/metasub.json.
Preprocessing |████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 Images
/kaggle/working/preprocessed/mammosub224.h5 created.
Timesheet created in /kaggle/working/preprocessed/sub_timesheet.json.


In [5]:
res = randresult(test_md_path)
res.to_csv("/kaggle/working/submission.csv", index=True)