Replication of Ruyi's code.

Dataset: IXI

10% for test, 90% for training. (Option: k-folds cross validation, not implemented yet.)

In [235]:

#to create training.csv and test.csv

import xlrd
import numpy as np
import pandas as pd
import os
import re
from progressbar import *
import nibabel as nib
import pdb
import scipy.ndimage
import matplotlib.pyplot as plt

def IXI_rename():
    '''
    This is an once for all thing.
    rename .nii.gz files as IXI[ixi_id].nii.gz
    '''
    STAMP = False # if any file has the right name for Regular Expression

    target_dir = '/media/woody/Elements/age_data/IXI/IXI-T1'
    files_list = os.listdir(target_dir)
    
    pbar = ProgressBar().start()
    n_bar = len(files_list)
    
    for i,filename in enumerate(files_list):
        re_result = re.match('^IXI.*-.*-.*-T1\.nii\.gz$',filename)
        if re_result:
            STAMP = True
            new_filename = filename.split('-')[0] + '.nii.gz'
            os.rename(os.path.join(target_dir,filename),os.path.join(target_dir,new_filename))
        pbar.update(int(i*100/(n_bar-1)))
    pbar.finish()
    
    if not STAMP:
        print('IXI_rename() finished. No file found.')
    else:
        print('IXI_rename() finished. Done.')
    return

def get_shuffled(imgs, labels):
    temp = np.array([imgs,labels])
    temp = temp.transpose()
    np.random.shuffle(temp)
    image_list = list(temp[:,0])
    label_list = list(temp[:,1])
    return image_list,label_list

def gen_phenotypics():
    '''
    This is an once for all thing.
    to generate phenotypics.csv
    '''
    if os.path.exists('./phenotypics.csv'):
        print('phenotypics.csv exists already.')
        return
    # get id and age from .xls
    phenotypic_table = xlrd.open_workbook('IXI.xls','rb')
    pt = phenotypic_table.sheets()[0]

    id_list = pt.col_values(0)[1:]
    index_age = np.where(np.array(pt.row_values(0))=='AGE')[0][0]
    age_list = pt.col_values(index_age)[1:]

    # delete empty items:
    for i in range(len(id_list)-1,0-1,-1):
        if age_list[i] == '':
            del id_list[i]
            del age_list[i]

    # shuffle and save the phenotypic info:
    id_list, age_list = get_shuffled(id_list,age_list)
    data_to_save = pd.DataFrame({'id':id_list,'age':age_list})
    data_to_save.to_csv(os.path.join('./','phenotypics.csv'), index=False, sep=',')
    print('phenotypics.csv created.')
    return

def gen_training_test_csv():
    '''
    This is an once for all thing.
    to generate training.csv and test.csv
    '''
    if os.path.exists('./training.csv') and os.path.exists('./test.csv'):
        print('training.csv and test.csv exist already.')
        return
    phenotypics = pd.read_csv('./phenotypics.csv', sep=',',header=0)
    mid_point = round(0.1 * len(phenotypics))
    training_df = phenotypics[:mid_point]
    test_df = phenotypics[mid_point:]
    training_df.to_csv(os.path.join('./','training.csv'), index=False, sep=',')
    print('training.csv created.')
    test_df.to_csv(os.path.join('./','test.csv'), index=False, sep=',')
    print('test.csv created.')
    return        

def resample(image, pixdim, new_spacing=[1,1,1]):
    '''
    All images are resampled according to the pixel dimension information read from the 
    image header files. 
    This ensures that all images will have the same resolution.
    
    image: ndarray nii_img.get_data()
    pixdim: nii_img.header['pixdim'][1:4]
    
    return: ndarray
    '''
    spacing = pixdim

    resize_factor = spacing / new_spacing
    new_real_shape = image.shape * resize_factor
    new_shape = np.round(new_real_shape)
    real_resize_factor = new_shape / image.shape
#     new_spacing = spacing / real_resize_factor
    image = scipy.ndimage.interpolation.zoom(image, real_resize_factor, mode='nearest')
    
    return image

def crop_pad(image,desired_shape):
    '''
    To crop or pad images to the same shape
    
    image: ndarray
    desired_shape: (130,130,110) like tuple
    
    return: ndarray
    '''
    X_margin_0 = int((desired_shape[0]-image.shape[0])/2)
    Y_margin_0 = int((desired_shape[1]-image.shape[1])/2)
    Z_margin_0 = int((desired_shape[2]-image.shape[2])/2)
    
    X_margin_1 = desired_shape[0]-image.shape[0]-X_margin_0
    Y_margin_1 = desired_shape[1]-image.shape[1]-Y_margin_0
    Z_margin_1 = desired_shape[2]-image.shape[2]-Z_margin_0
    
    npad = ((X_margin_0,X_margin_1), 
            (Y_margin_0,Y_margin_1), 
            (Z_margin_0,Z_margin_1))
    crop_padded_img = np.pad(image, pad_width=npad, mode='constant', constant_values=0)
    return crop_padded_img

def preprocess_1(nii_file):
    '''
    preprocess
    step.1: resample
    step.2: crop and padd
    
    nii_file: absolute path of .nii.gz file
    
    return: ndarray
    '''
    DESIRED_SHAPE=(130, 130, 110)
    
    nii_img = nib.load(nii_file)
    header = nii_img.header
    pixdim = header['pixdim'][1:4]
    npy_img = nii_img.get_data()
#     print('original image shape: ',npy_img.shape)
    resampled_img = resample(npy_img, pixdim, [2,2,2])
#     print('resampled img shape: ',resampled_img.shape)
    crop_padded_img = crop_pad(resampled_img,DESIRED_SHAPE)
#     print('crop and padded img shape: ', crop_padded_img.shape)

    return crop_padded_img

def preprocess_2(npy_file):
    '''
    preprocess
    step.3 subtract mean values
    
    npy_file: absolute path of .npy file
    
    return: ndarray
    '''
    
    
    return
    
def gen_npy():
    '''
    To read in .nii.gz files and preprocess, including preprocess_1() and preprocess_2(), 
    then output .npy files in target_dir folder.
    
    '''
    source_dir = '/media/woody/Elements/age_data/IXI/IXI-T1'
    target_dir = './IXI_npy'
    target_dir_origin = os.path.join(target_dir,'origin')
    target_dir_mean = os.path.join(target_dir,'mean')
        
    dirs = [target_dir, target_dir_origin,target_dir_mean]
        
    for path in dirs:
        try:
            os.mkdir(path)
        except FileExistsError:
            print(path,' exists already!')
        
    
    # preprocess_1: step.1 step.2   
    nii_list = os.listdir(source_dir)
     
    pbar = ProgressBar().start()
    n_bar = len(nii_list)
    
    for i,filename in enumerate(nii_list):
        re_result = re.match('^IXI[0-9]*\.nii\.gz$',filename)
        if re_result:
            target_filename = os.path.join(target_dir_origin,filename.split('.')[0])
            if not os.path.exists(target_filename + '.npy'):
                pdb.set_trace()
                cropped_npy = preprocess_1(os.path.join(source_dir,filename))
                np.save(target_filename,cropped_npy)
        pbar.update(int(i*100/(n_bar-1)))
    pbar.finish()
    
    # preprocess_2: step.3
    npy_list = os.listdir(target_dir_origin)
    
    pbar = ProgressBar().start()
    n_bar = len(npy_list)
    
    for i,filename in enumerate(npy_list):
        re_result = re.match('^IXI[0-9]*\.npy$',filename)
        if re_result:
            target_filename = os.path.join(target_dir_mean,filename)
            if not os.path.exists(target_filename):
                print('target_filename')
        pbar.update(int(i*100/(n_bar-1)))
    pbar.finish()

phenotypics = pd.read_csv('./phenotypics.csv', sep=',',header=0)
id_list = list(phenotypics['id'])
age_list = list(phenotypics['age'])

print()

mid_point = round(0.1 * len(id_list))
test_id_list = id_list[:mid_point]
test_age_list = age_list[:mid_point]
training_id_list = id_list[mid_point:]
training_age_list = age_list[mid_point:]



for i in range(len(training_id_list)):
    ixi_id = int(training_id_list[i])
    age = round(training_age_list[i],2)
    str_id = str(ixi_id)
    if ixi_id < 10:
        str_id = '00' + str_id
    elif ixi_id > 9 and ixi_id < 100:
        str_id = '0' + str_id
    nii_filename = 'IXI' + str_id + '.nii.gz'
    
    print(ixi_id,str_id)

# assignment of training and test set
# preprocess each .nii file to .npy and save the location to training.csv/test.csv


# np.array(age_list)==

# print(type(pt.row_values(1)[-1]))
# print(pt.row_values(1)[-1]=='')
# # # pt.row_values(2)
# # # pt.col_values(0)
# index_available = np.where(np.array(pt.row_values(0))=='AGE')[0][0]

# a = np.random.randn(5,5)
# # print(a != '')
# b = np.delete(a,[0,2],0)
# print(b)



432 432
153 153
453 453
277 277
648 648
263 263
531 531
173 173
447 447
261 261
13 013
226 226
104 104
158 158
525 525
560 560
367 367
636 636
591 591
413 413
606 606
206 206
294 294
140 140
380 380
328 328
451 451
498 498
383 383
357 357
328 328
238 238
512 512
64 064
242 242
265 265
66 066
445 445
296 296
452 452
48 048
575 575
282 282
165 165
508 508
74 074
205 205
94 094
237 237
253 253
336 336
62 062
363 363
629 629
311 311
533 533
651 651
626 626
233 233
338 338
437 437
597 597
196 196
598 598
97 097
224 224
116 116
131 131
315 315
334 334
478 478
249 249
405 405
219 219
502 502
80 080
379 379
102 102
549 549
613 613
469 469
433 433
293 293
212 212
148 148
497 497
522 522
486 486
201 201
217 217
25 025
139 139
176 176
491 491
36 036
601 601
534 534
95 095
449 449
63 063
199 199
360 360
128 128
616 616
188 188
276 276
90 090
213 213
162 162
275 275
342 342
489 489
512 512
223 223
308 308
408 408
611 611
313 313
290 290
506 506
178 178
483 483
151 151
607 607
499 499
552 552
458 4

In [214]:
gen_npy()

                                                                               / |#                                                  | 0 Elapsed Time: 0:00:00

./IXI_npy exists already!
original image shape:  (256, 256, 146)
resampled img shape:  (120, 120, 88)
crop and padded img shape:  (130, 130, 110)
> <ipython-input-213-761efb78efa1>(160)preprocess()
-> return resampled_img
(Pdb) c


                                                                               - |                             #                     | 0 Elapsed Time: 0:06:14

original image shape:  (256, 256, 150)
resampled img shape:  (120, 120, 90)
crop and padded img shape:  (130, 130, 110)
> <ipython-input-213-761efb78efa1>(160)preprocess()
-> return resampled_img
(Pdb) c


                                                                               \ |         #                                         | 0 Elapsed Time: 0:06:16

original image shape:  (256, 256, 150)
resampled img shape:  (120, 120, 90)
crop and padded img shape:  (130, 130, 110)
> <ipython-input-213-761efb78efa1>(160)preprocess()
-> return resampled_img
(Pdb) c


                                                                               | |                #                                  | 0 Elapsed Time: 0:06:25

original image shape:  (256, 256, 150)
resampled img shape:  (120, 120, 90)
crop and padded img shape:  (130, 130, 110)
> <ipython-input-213-761efb78efa1>(160)preprocess()
-> return resampled_img
(Pdb) c


                                                                               / |      #                                            | 0 Elapsed Time: 0:06:28

original image shape:  (256, 256, 150)
resampled img shape:  (120, 120, 90)
crop and padded img shape:  (130, 130, 110)
> <ipython-input-213-761efb78efa1>(160)preprocess()
-> return resampled_img
(Pdb) c


                                                                               - |                                     #             | 0 Elapsed Time: 0:06:31

original image shape:  (256, 256, 150)
resampled img shape:  (120, 120, 90)
crop and padded img shape:  (130, 130, 110)
> <ipython-input-213-761efb78efa1>(160)preprocess()
-> return resampled_img
(Pdb) c


                                                                               \ |                               #                   | 0 Elapsed Time: 0:06:34

original image shape:  (256, 256, 150)
resampled img shape:  (120, 120, 90)
crop and padded img shape:  (130, 130, 110)
> <ipython-input-213-761efb78efa1>(160)preprocess()
-> return resampled_img
(Pdb) c


                                                                               | |          #                                        | 1 Elapsed Time: 0:06:36

original image shape:  (256, 256, 150)
resampled img shape:  (120, 120, 90)
crop and padded img shape:  (130, 130, 110)
> <ipython-input-213-761efb78efa1>(160)preprocess()
-> return resampled_img
(Pdb) c


                                                                               / |             #                                     | 1 Elapsed Time: 0:06:39

original image shape:  (256, 256, 150)
resampled img shape:  (120, 120, 90)
crop and padded img shape:  (130, 130, 110)
> <ipython-input-213-761efb78efa1>(160)preprocess()
-> return resampled_img
(Pdb) c


                                                                               - |                                   #               | 1 Elapsed Time: 0:06:41

original image shape:  (256, 256, 140)
resampled img shape:  (120, 120, 84)
crop and padded img shape:  (130, 130, 110)
> <ipython-input-213-761efb78efa1>(160)preprocess()
-> return resampled_img
(Pdb) c


                                                                               \ |                         #                         | 1 Elapsed Time: 0:06:45

original image shape:  (256, 256, 150)
resampled img shape:  (120, 120, 90)
crop and padded img shape:  (130, 130, 110)
> <ipython-input-213-761efb78efa1>(160)preprocess()
-> return resampled_img
(Pdb) q


BdbQuit: 

In [193]:
phenotypics = pd.read_csv('./phenotypics.csv', sep=',',header=0)
# print(phenotypics)
phenotypics[5:10]

Unnamed: 0,age,id
5,68.134155,473.0
6,44.991102,287.0
7,45.295003,588.0
8,62.455852,392.0
9,66.015058,603.0


In [236]:
# main

# after tar IXI dataset
# rename files
IXI_rename()
# get phenotypics.csv
gen_phenotypics()
# get training.csv test.csv
gen_training_test_csv()
# get preprocessed .npy files
gen_npy()

| |#                                                | 100 Elapsed Time: 0:00:00
| |#                                                | 100 Elapsed Time: 0:00:00
| |#                                                | 100 Elapsed Time: 0:00:00


IXI_rename() finished. No file found.
phenotypics.csv exists already.
training.csv and test.csv exist already.
./IXI_npy  exists already!
./IXI_npy/origin  exists already!
./IXI_npy/mean  exists already!
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filename
target_filen

In [137]:
!cd /media/woody/Elements/age_data/IXI/IXI-T1/

os.path.exists('/media/woody/Elements/age_data/IXI/IXI-T1/IXI.*-.*-.*-T1\.nii\.gz$')

False

In [159]:
import time
from progressbar import *

# for i in range(10):
#     print('\r'+str((i+1)*100.0/10)+'%',end='',flush=True)
#     time.sleep(0.3)
# print('abc')

pbar = ProgressBar().start()
for i in range(10):
    pbar.update(int(i*100/(10-1)))
    time.sleep(0.8)
pbar.finish()

| |                 #                               | 100 Elapsed Time: 0:00:08


In [178]:
import time
from progressbar import *
total = 100
def dosomework():
    time.sleep(0.01)
widgets = ['Progress: ',Percentage(), ' ', Bar('>'),' ', Timer(),
      ' ', ETA()]
pbar = ProgressBar(widgets=widgets, maxval=total+100).start()
for i in range(total):
    # do something
    pbar.update(i)
    dosomework()
pbar.finish()

Progress: 100% |>>>>>>>>>>>>>>>>>>>>>>>>>| Elapsed Time: 0:00:01 Time:  0:00:01


In [175]:
ProgressBar?

In [161]:
  
import sys, time  
from progressbar import *  
  
total = 1000  
  
def dosomework():  
    time.sleep(0.01)  
pbar = ProgressBar().start()  
for i in range(1000):  
    pbar.update(int((i / (total - 1)) * 100))  
    dosomework()  
pbar.finish()  

| |       #                                         | 100 Elapsed Time: 0:00:10


In [16]:
data = xlrd.open_workbook('IXI.xls','rb')
print('工作表名为：'+ data.sheet_names()[0])
table = data.sheets()[0]
nrows = table.nrows
ncols = table.ncols
print('表格行数为%d,列数为%d'%(nrows,ncols))

#输出每一行的值
# for item in range(table.nrows):
#     print(table.row_values(item))

#获取单元格的值
cell_A1 = table.row(0)[0].value
cell_A2 = table.cell(0,0).value
cell_A3 = table.col(0)[0].value

print(cell_A1)
print(cell_A2)
print(cell_A3)

工作表名为：Table
表格行数为620,列数为12
IXI_ID
IXI_ID
IXI_ID
