In [23]:
%matplotlib notebook
import os
import zipfile
import multiprocessing as mp
import subprocess
import shutil

from pathlib import Path
import glob
import re

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image

import tciaclient
import highdicom as hd
import pydicom
from pydicom.filereader import dcmread
from pydicom.sr.codedict import codes
import pylidc as pl

In [None]:
def getResponseString(response):
    if response.getcode() is not 200:
        raise ValueError("Server returned an error")
    else:
        return response.read()

In [None]:
api_key = "16ade9bc-f2fa-4a37-b357-36466a0020fc"
baseUrl="https://services.cancerimagingarchive.net/services/v3"
resource = "TCIA"

In [None]:
client = tciaclient.TCIAClient(baseUrl, resource)

In [None]:
response = client.get_series(modality="CT", collection="LIDC-IDRI")#, bodyPartExamined="CHEST")
strRespSeries = getResponseString(response)

df = pandas.io.json.read_json(strRespSeries)
print(df["Modality"].unique())

In [None]:
labels=pd.read_csv("/mnt/idms/PROJECTS/Lung/LIDC_IDRI/Labels/LIDC-IDRI_MetaData.csv")
labels=labels.rename(columns={"Study UID":"StudyInstanceUID"})
df=df.merge(labels,on="StudyInstanceUID")[["SeriesInstanceUID","Subject ID"]]

In [None]:
downloadPath="/mnt/idms/PROJECTS/Lung/LIDC_IDRI2/"

In [None]:
def download_zip(ids):
    (uid,sid)=ids
    zipFileName=f"LIDC_IDRI_{sid}.zip"
    if not os.path.exists(f"{downloadPath}{zipFileName}"):
        print(f"{downloadPath}{zipFileName}")
        response = client.get_image(uid,downloadPath,zipFileName)

In [None]:
pool = mp.Pool(80)
pool.map(download_zip, zip(list(df["SeriesInstanceUID"]),df["Subject ID"]))

In [None]:
a="jfkldjfdklsfjdklsf.zip"
a[-4:]

In [None]:
def unzip_LIDC(sid):
    if not os.path.exists(f"{downloadPath}{sid}"):
        try:
            print(f"{downloadPath}LIDC_IDRI_{sid}.zip GOOOOOD")
            with zipfile.ZipFile(f"{downloadPath}LIDC_IDRI_{sid}.zip", 'r') as zip_ref:
                zip_ref.extractall(f"{downloadPath}{sid}")
        except:
            os.remove(f"{downloadPath}LIDC_IDRI_{sid}.zip")
            print(f"{downloadPath}LIDC_IDRI_{sid}.zip ERROR")
            #raise

In [None]:
pool = mp.Pool(80)
pool.map(unzip_LIDC, list(df["Subject ID"]))

In [None]:
filename = "/mnt/idms/PROJECTS/Lung/Kaggle-data/cancer_cases/home/peter/lung/data/stage1/stage1/229b8b785f880f61d8dad636c3dc2687/f1a23adea3019e89e938f9de24887ae6.dcm"
ds = pydicom.dcmread(filename)
print((ds.pixel_array).shape)

plt.imshow(ds.pixel_array, cmap=plt.cm.bone)

data_img = Image.fromarray(ds.pixel_array)
data_img_rotated = data_img.rotate(angle=45,resample=Image.BICUBIC,fillcolor=data_img.getpixel((0,0)))
print(type(data_img_rotated))
plt.imshow(data_img_rotated)

In [None]:
from PIL import Image
data_img = Image.fromarray(ds.pixel_array)
data_img_rotated = data_img.rotate(angle=45,resample=Image.BICUBIC,fillcolor=data_img.getpixel((0,0)))

In [None]:
filename_lymp = "/mnt/idms/PROJECTS/Lung/LymphoglandulaeMetastasis/SE000006/50848-0584.dcm"
filename_lung="/mnt/idms/PROJECTS/Lung/LungMetastasis/SE000004/54680-0545.dcm"
lidc="/mnt/idms/PROJECTS/Lung/LIDC_Proba/318-06a33f72aef6dc0e3b645cd7a50610a6.dcm"
ds = pydicom.dcmread(lidc)
#print((ds.pixel_array).shape)

plt.imshow(ds.pixel_array, cmap=plt.cm.bone)

#data_img = Image.fromarray(ds.pixel_array)
#data_img_rotated = data_img.rotate(angle=45,resample=Image.BICUBIC,fillcolor=data_img.getpixel((0,0)))
#print(type(data_img_rotated))
#plt.imshow(data_img)

In [None]:
no_num=len(glob.glob('./NoMetastasis/*.txt'))
lung_num=len(glob.glob('./LungMetastasis/*.txt'))
lymp_num=len(glob.glob('./LymphoglandulaeMetastasis/*.txt'))
ct_num=no_num+lung_num+lymp_num

In [None]:
lung_labels=np.asarray([0]*no_num+[1]*lung_num+[0]*lymp_num).astype(np.float32)
lymp_labels=np.asarray([0]*no_num+[0]*lung_num+[1]*lymp_num).astype(np.float32)

print(lung_labels)

print(lung_labels.shape)
#for filename in glob.glob('./Tudo-Ulyssys/*.txt'):
#    with open(filename, 'r', encoding='latin1', errors='ignore') as file:

## Pylidc API

In [29]:
pid = 'LIDC-IDRI-0078'
scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == pid).first()

nods = scan.cluster_annotations()

print("%s has %d nodules." % (scan, len(nods)))
# => Scan(id=1,patient_id=LIDC-IDRI-0078) has 4 nodules.

for i,nod in enumerate(nods):
    print("Nodule %d has %d annotations." % (i+1, len(nods[i])))
    print(vars(nods[i]))
# => Nodule 1 has 4 annotations.
# => Nodule 2 has 4 annotations.
# => Nodule 3 has 1 annotations.
# => Nodule 4 has 4 annotations.
#scan.visualize(annotation_groups=nods)

Scan(id=1,patient_id=LIDC-IDRI-0078) has 4 nodules.
Nodule 1 has 4 annotations.


TypeError: vars() argument must have __dict__ attribute

In [None]:
scan=list(pl.query(pl.Scan))[17]
img_np=scan.to_volume()[:,:,40]
maxi=np.max(img_np)
mini=np.min(img_np)
img_np=((img_np-mini)/(maxi-mini))*255
Image.fromarray(img_np).show()

In [None]:
for scan in pl.query(pl.Scan):
    print(scan)
    #print(scan.cluster_annotations())
    Image.fromarray(scan.to_volume()).show()
    break

In [None]:
scans.count()

In [None]:
vars(pl.Scan.scan)

In [None]:
path="/mnt/idms/PROJECTS/Lung/LIDC_IDRI/LIDC_IDRI_1.3.6.1.4.1.14519.5.2.1.6279.6001.179730018513720561213088132029"

In [None]:
for dcm in glob.glob(f"{path}/*.dcm"):
    print(pydicom.read_file(dcm))

In [22]:
lidc_dicom_seg_path="/mnt/idms/PROJECTS/Lung/LIDC_IDRI_DICOM/LIDC-IDRI-0252/01-01-2000-CT THORAX WCONTRAST-85575/5.000000-Segmentation of Nodule 1 - Annotation 0-29624/1-1.dcm"
lidc_dicom_annot_path="/mnt/idms/PROJECTS/Lung/LIDC_IDRI_DICOM/LIDC-IDRI-0252/01-01-2000-CT THORAX WCONTRAST-85575/8.000000-Nodule 1 - Annotation 172251 evaluations-94521/1-1.dcm"
#Image.fromarray(((pydicom.read_file(lidc_dicom_seg_path).pixel_array)[6])*255).show()
pydicom.read_file(lidc_dicom_annot_path)

Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length  UL: 196
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: Enhanced SR Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 1.2.276.0.7230010.3.1.4.0.16106.1553298746.694519
(0002, 0010) Transfer Syntax UID                 UI: Explicit VR Little Endian
(0002, 0012) Implementation Class UID            UI: 1.3.6.1.4.1.22213.1.143
(0002, 0013) Implementation Version Name         SH: '0.5'
(0002, 0016) Source Application Entity Title     AE: 'POSDA'
-------------------------------------------------
(0008, 0012) Instance Creation Date              DA: '20190322'
(0008, 0013) Instance Creation Time              TM: '195226'
(0008, 0014) Instance Creator UID                UI: 1.2.276.0.7230010.3.0.3.6.3
(0008, 0016) SOP Class UID                       UI: Enhanced SR Storage
(0008, 0018) SOP Instance UID                 

### Highdicom package

In [40]:
sr_dataset = pydicom.read_file(lidc_dicom_annot_path)
#print(type(sr_dataset))

# Find all content items that may contain other content items.
containers = hd.sr.utils.find_content_items(
    dataset=sr_dataset,
    relationship_type=hd.sr.RelationshipTypeValues.CONTAINS
)
#print(containers)
print(sr_dataset.ContentTemplateSequence[0].TemplateIdentifier)
# Query content of SR document, where content is structured according
# to TID 1500 "Measurment Report"
if sr_dataset.ContentTemplateSequence[0].TemplateIdentifier in ['TID1500','1500']:
    # Determine who made the observations reported in the document
    observers = hd.sr.utils.find_content_items(
        dataset=sr_dataset,
        name=codes.DCM.PersonObserverName
    )
    print(observers)

    # Find all imaging measurements reported in the document
    '''measurements = hd.sr.utils.find_content_items(
        dataset=sr_dataset,
        name=codes.DCM.ImagingMeasurements,
        recursive=True
    )
    print(measurements)
    '''

    # Find all findings reported in the document
    findings = hd.sr.utils.find_content_items(
        dataset=sr_dataset,
        name=codes.DCM.Finding,
        recursive=True
    )
    print(findings)

    # Find regions of interest (ROI) described in the document
    # in form of spatial coordinates (SCOORD)
    regions = hd.sr.utils.find_content_items(
        dataset=sr_dataset,
        value_type=ValueTypeValues.SCOORD,
        recursive=True
    )
    print(regions)

1500
[(0040, a010) Relationship Type                   CS: 'HAS OBS CONTEXT'
(0040, a040) Value Type                          CS: 'PNAME'
(0040, a043)  Concept Name Code Sequence  1 item(s) ---- 
   (0008, 0100) Code Value                          SH: '121008'
   (0008, 0102) Coding Scheme Designator            SH: 'DCM'
   (0008, 0104) Code Meaning                        LO: 'Person Observer Name'
   ---------
(0040, a123) Person Name                         PN: 'anonymous']


ValueError: 'HAS ACQ CONTEXT' is not a valid RelationshipTypeValues