In [25]:
# What does lru_cache do?
from functools import lru_cache

@lru_cache(1)
def addxy(x, y):
    print('add {} and {}'.format(x, y))
    return x+y

print(addxy(1, 2))
print(addxy(1, 2))
print(addxy(3, 4))

add 1 and 2
3
3
add 3 and 4
7


In [64]:
import glob
from collections import namedtuple
import os
import csv

In [33]:
CandidateInfoTuple = namedtuple(
'CandidateInfoTuple', # this namedtuple's name
'isNodule_bool, diameter_mm, series_uid, center_xyz')

In [79]:
# get the name of .mhd file I downloaded and store these information in cache
# no return, just store
requireOnDisk_bool = True
@lru_cache(1)
def getCandidateInfoList(requireOnDisk_bool=requireOnDisk_bool):
    # a default parameter of this function
    mhd_list = glob.glob('./luna16/data/subset7/*.mhd')
    # mhd_list is a list of file paths, like '/subset7/534991.mhd'
    # os.path.split(p) returns a list of os.path and p, [path, p]
    # -1 we get p, and :-4 we drop .mhd and get only the name
    presentOnDisk_set = {os.path.split(p)[-1][:-4] for p in mhd_list}
    return(presentOnDisk_set)

presentOnDisk_set = getCandidateInfoList(requireOnDisk_bool)

In [70]:
# get information about diameter for each id
diameter_dict = {}
with open('./luna16/data/annotations.csv') as f:
    # csv.reader: read f in csv version, so row is a list of each element
    # 1: because the first row is header
    for row in list(csv.reader(f))[1:]:
        series_uid = row[0]
        annotationCenter_xyz = tuple(float(x) for x in row[1:4])
        annotationDiameter_mm = float(row[4])
        # dict.setdefault(a, b): if a is in dict, then return dict[a],
        # if a is not in dict, then set dict[a]=b and return
        diameter_dict.setdefault(series_uid, []).append(
            (annotationCenter_xyz, annotationDiameter_mm)
        )

In [71]:
# get information of each nodule candidate from candidate file
candidateInfo_list = []
with open('./luna16/data/candidates.csv') as f:
    for row in list(csv.reader(f))[1:]:
        series_uid = row[0]
        # check whether id is in the cache (in our subset file)
        if series_uid not in presentOnDisk_set and requireOnDisk_bool:
            continue # skip this one
        # is nodule or not
        isNodule_bool = bool(int(row(4)))
        candidateCenter_xyz = tuple([float(x) for x in row[1:4]])
        
        # 
        candidateDiameter_mm = 0.0
        # dict.get(a, b): if a is in dict, then return dict[a],
        # if a is not in dict, then return b
        for annotation_tup in diameter_dict.get(series_uid, []):
            annotationCenter_xyz, annotationDiameter_mm = annotation_tup
            for i in range(3):
                delta_mm = abs(candidateCenter_xyz[i]-annotationCenter_xyz[i])
                if delta_mm > annotationDiameter_mm / 4:
                    break
                else:
                    

{'1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222365663678666836860': [((-128.6994211,
    -175.3192718,
    -298.3875064),
   5.651470635),
  ((103.7836509, -211.9251487, -227.12125), 4.224708481)],
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793540579077826395208': [((69.63901724,
    -140.9445859,
    876.3744957),
   5.786347814)],
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016233746780170740405': [((-24.0138242,
    192.1024053,
    -391.0812764),
   8.143261683),
  ((2.441546798, 172.4648812, -405.4937318), 18.54514997),
  ((90.93171321, 149.0272657, -426.5447146), 18.20857028),
  ((89.54076865, 196.4051593, -515.0733216), 16.38127631)],
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028192176989979435275': [((81.50964574,
    54.9572186,
    -150.3464233),
   10.36232088)],
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408431413578140925249': [((105.0557924,
    19.82526014,
    -91.24725078),
   21.08961863)],
 '1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760173137525888934217': 

In [89]:
for iten in diameter_dict.get('1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222365663678666836860',[]):
    x, y = iten
    print(x[1]-y)

-180.970742435
-216.149857181
