In [15]:
import flywheel
import os
import datetime
from dateutil.tz import tzutc
import requests
import time

fw = flywheel.Client()
collection_id = '5eb5081448fe1b1e5792a7a9'
local_dir = '/media/will/My Passport/Ubuntu/cortical_thickness_maps/ct'
base_date = datetime.datetime(2020, 12, 19, 20, 33, 5, 714000, tzinfo=tzutc())

In [2]:
# Get all the sessions within the collection.
sessions = [fw.get_session(x.id) for x in fw.get_collection_sessions(collection_id)]

In [16]:
def download_warps(session, passed, failed, double):
    '''
    Script to download the needed files for a given session.
    
    session: the session in question
    passed: a list of strings identifying sessions that were successfully downloaded.
    failed: a list of string identifying sessions whose downloads failed.
    double: whether or not the subject directory has -2 at the end. (See below)
    
    '''
    
    # cleaning up subject and session names  
    sub_label = fw.get(session.parents['subject']).label
    if '_' in sub_label:
        sub_label = sub_label.replace('_','x')
        # TOME is a weird one because of a space in the session names. 
    if 'TOME' in sub_label: #also doesn't have _ between session and suffix in ANTsCT output files
        ses_label='Session'
    elif '_' in session.label:
        ses_label = session.label.replace('_','x') + '_'
    else:
        ses_label = session.label + '_' # all sessions beside TOME ones have '_' between session and suffix
    id_str = '{}_{}'.format(sub_label, session.label) # used in accounting later
    
    # create the subject directory 
    sub_dir = os.path.join(local_dir, sub_label)
    if double:
        sub_dir = sub_dir + '-2'
    os.makedirs(sub_dir, exist_ok=True)
    
    # check you haven't downloaded from this subject before
    num_files = len(os.listdir(sub_dir))
    if num_files >= 6:
        print("Already downloaded {} files for {}.".format(num_files, sub_label))
        passed.append(id_str)
        return passed, failed

    print("Downloading {}'s {} files to {}.".format(sub_label, session.label, sub_dir))
    
    # identify the correct analysis and do the download
    for analysis in session.analyses:
        match = False

        if 'antsct' in analysis.label and analysis.created > base_date:
            match = True            

        if match and analysis.job['state']=='complete' and analysis.files:
            ## Append to the analyses list
            passed.append(id_str)
            affine_name = 'sub-{}_ses-{}TemplateToSubject1GenericAffine.mat'.format(sub_label, ses_label)
            warp_name = 'sub-{}_ses-{}TemplateToSubject0Warp.nii.gz'.format(sub_label, ses_label)
            mask_name = 'sub-{}_ses-{}CorticalMask.nii.gz'.format(sub_label, ses_label)
            ext_brain_name = 'sub-{}_ses-{}ExtractedBrain0N4.nii.gz'.format(sub_label, ses_label)
            brain_mask_name = 'sub-{}_ses-{}BrainExtractionMask.nii.gz'.format(sub_label, ses_label)
            zip_name = 'antsct_sub-{}_sub-{}.zip'.format(sub_label,sub_label)

            try:
                analysis.download_file_zip_member(zip_name, affine_name, os.path.join(sub_dir, affine_name))
                analysis.download_file_zip_member(zip_name, warp_name, os.path.join(sub_dir, warp_name))
                analysis.download_file_zip_member(zip_name, mask_name, os.path.join(sub_dir, mask_name))
                analysis.download_file_zip_member(zip_name, ext_brain_name, os.path.join(sub_dir, ext_brain_name))
                analysis.download_file_zip_member(zip_name, brain_mask_name, os.path.join(sub_dir, brain_mask_name))
                print(" ...Done downloading.")
                print(' ')
            except flywheel.ApiException as e:
                print(e)
                print(session.label)
                print('')
                failed.append(id_str)
                break
                
    return passed, failed
                

In [4]:
connection_timeout = 30 # seconds
passed = list()
failed = list()
# Iterate over the sessions, find the analyses, and download...
for session in sessions:
    # Retry after exception to gracefully handle connection errors
    start_time = time.time()
    while True:
        try:            
            passed, failed = download_warps(session, passed, failed, False)
            break
        except ConnectionError:
            if time.time() > start_time + connection_timeout:
                raise Exception('Unable to get updates after {} seconds of ConnectionErrors'.format(connection_timeout))
            else:
                time.sleep(1)


Already downloaded 6 files for TOMEx3001.
Already downloaded 6 files for TOMEx3002.
Already downloaded 6 files for TOMEx3003.
Already downloaded 6 files for TOMEx3007.
Already downloaded 6 files for TOMEx3004.
Already downloaded 6 files for TOMEx3008.
Already downloaded 6 files for TOMEx3009.
Already downloaded 6 files for TOMEx3005.
Already downloaded 6 files for TOMEx3018.
Already downloaded 6 files for TOMEx3012.
Already downloaded 6 files for TOMEx3015.
Already downloaded 6 files for TOMEx3013.
Already downloaded 6 files for TOMEx3017.
Already downloaded 6 files for TOMEx3011.
Already downloaded 6 files for TOMEx3014.
Already downloaded 6 files for TOMEx3016.
Already downloaded 6 files for TOMEx3024.
Already downloaded 6 files for TOMEx3026.
Already downloaded 6 files for TOMEx3021.
Already downloaded 6 files for TOMEx3022.
Already downloaded 6 files for TOMEx3023.
Already downloaded 6 files for TOMEx3028.
Already downloaded 6 files for TOMEx3019.
Already downloaded 6 files for TOM

## Book-keeping

In [7]:
# Check to see how many downloads succeeded
print(len(passed))
print(len(failed))

868
1


In [8]:
# Check how many subjects have complete downloads
complete = []
incomplete = []
subdirs = list(filter(os.path.isdir, [os.path.join(local_dir, f) for f in os.listdir(local_dir)]))
for subdir in subdirs:
    n_files = len(os.listdir(subdir))
    if n_files >= 6:
        complete.append(subdir)
    else:
        incomplete.append(subdir)
        
print(len(complete))
print(len(incomplete))

863
5


In [9]:
# print names of subjects with incomplete data
for i in range(len(incomplete)):
    print(incomplete[i])

/media/will/My Passport/Ubuntu/cortical_thickness_maps/ct/122702
/media/will/My Passport/Ubuntu/cortical_thickness_maps/ct/101162-2
/media/will/My Passport/Ubuntu/cortical_thickness_maps/ct/115381-2
/media/will/My Passport/Ubuntu/cortical_thickness_maps/ct/117889-2
/media/will/My Passport/Ubuntu/cortical_thickness_maps/ct/118458-2


It looks like there are two types of failures. 122702 failed because the session in the collection was actually a 7T session and the script didn't recognize any of the zip file paths. The rest failed because they share a subject name with a subject from another project. I manually created folders with a '-2' suffix to  distinguish, but the script skipped over these sessions because it looked at the corresponding folder and saw there were >5 files.

The first will require removing the current session, adding that subject's 3T session, and re-running ANTS-CT. Let's redo those final four for now.

In [12]:
missed_sessions = [
    fw.get('5e1cfeb83a9694007a7251e2'),
    fw.get('5e1e81183a9694007a74a97e'),
    fw.get('5e1edd1b3a9694004d680a07'),
    fw.get('5e1f19923a9694007a75688f')
]
connection_timeout = 30 # seconds
passed2 = list()
failed2 = list()
# Iterate over the sessions, find the analyses, and download...
for session in missed_sessions:
    # Retry after exception to gracefully handle connection errors
    start_time = time.time()
    while True:
        try:            
            passed2, failed2 = download_warps(session, passed2, failed2, True) # for the 'duplicate' sessions this time
            break
        except ConnectionError:
            if time.time() > start_time + connection_timeout:
                raise Exception('Unable to get updates after {} seconds of ConnectionErrors'.format(connection_timeout))
            else:
                time.sleep(1)

print(len(passed2))
print(len(failed2))

Downloading 101162's 20160525-1400 files to /media/will/My Passport/Ubuntu/cortical_thickness_maps/ct/101162-2.
 ...Done downloading.
 
Downloading 115381's 20051011-1539 files to /media/will/My Passport/Ubuntu/cortical_thickness_maps/ct/115381-2.
 ...Done downloading.
 
Downloading 117889's 20150316-0855 files to /media/will/My Passport/Ubuntu/cortical_thickness_maps/ct/117889-2.
 ...Done downloading.
 
Downloading 118458's 20140929-1347 files to /media/will/My Passport/Ubuntu/cortical_thickness_maps/ct/118458-2.
 ...Done downloading.
 
4
0


In [14]:
# Check (again) how many subjects have complete downloads
complete2 = []
incomplete2 = []
subdirs = list(filter(os.path.isdir, [os.path.join(local_dir, f) for f in os.listdir(local_dir)]))
for subdir in subdirs:
    n_files = len(os.listdir(subdir))
    if n_files >= 6:
        complete2.append(subdir)
    else:
        incomplete2.append(subdir)
        
print(len(complete2))
print(len(incomplete2))

867
1


Try the final session, 1222702:

In [20]:
passed3 = list()
failed3 = list()
missed_session = fw.get('5d049ac5a550c60048704d67')
passed3, failed3 = download_warps(missed_session, passed3, failed3, False)
print(len(passed3))
print(len(failed3))

Already downloaded 6 files for 122702.
1
0


In [None]:
# Check (again) how many subjects have complete downloads
complete3 = []
incomplete3 = []
subdirs = list(filter(os.path.isdir, [os.path.join(local_dir, f) for f in os.listdir(local_dir)]))
for subdir in subdirs:
    n_files = len(os.listdir(subdir))
    if n_files >= 6:
        complete3.append(subdir)
    else:
        incomplete3.append(subdir)
        
print(len(complete3))
print(len(incomplete3))

------------------------------------------
An example of looking through the zip file:

In [13]:
ses = fw.get('5d049ac5a550c60048704d67')
for analysis in ses.analyses:
    match = False
    sub_label = fw.get(ses.parents['subject']).label
    zip_name = 'antsct_sub-{}_sub-{}.zip'.format(sub_label,sub_label)
    if 'antsct' in analysis.label and analysis.created > base_date:
        match = True            

    if match and analysis.job['state']=='complete' and analysis.files:  
        zip_info = analysis.get_file_zip_info(zip_name)
        print(zip_info)

{'comment': '',
 'members': [{'comment': '',
              'path': 'sub-122702_ses-122702x20180417x7T_ACTStage1Complete.txt',
              'size': 76,
              'timestamp': datetime.datetime(2020, 12, 23, 10, 12, 6, tzinfo=tzutc())},
             {'comment': '',
              'path': 'sub-122702_ses-122702x20180417x7T_ACTStage2Complete.txt',
              'size': 76,
              'timestamp': datetime.datetime(2020, 12, 23, 16, 17, 56, tzinfo=tzutc())},
             {'comment': '',
              'path': 'sub-122702_ses-122702x20180417x7T_ACTStage3Complete.txt',
              'size': 76,
              'timestamp': datetime.datetime(2020, 12, 23, 18, 17, tzinfo=tzutc())},
             {'comment': '',
              'path': 'sub-122702_ses-122702x20180417x7T_ACTStage4Complete.txt',
              'size': 76,
              'timestamp': datetime.datetime(2020, 12, 23, 23, 56, 40, tzinfo=tzutc())},
             {'comment': '',
              'path': 'sub-122702_ses-122702x20180417x7T_ACT