In [25]:
import flywheel
import pandas as pd
from datetime import datetime
import os 
import glob

fw = flywheel.Client()
gear = 'deid-export'
gear_fw = fw.lookup(f'gears/{gear}')
collection_id = '618d6fb6bb845b310730d8f8' # Neuroprint Validation collection
project_id = '60fef55e60ec55d1b0e0741e' # Neuroprint Validation project
project = fw.get(project_id)
collection = fw.get(collection_id)

## Run gear to move sessions

In [57]:
collection_sessions = [fw.get_session(x.id) for x in fw.get_collection_sessions(collection_id)]

In [7]:
for ses in collection_sessions:
    project = fw.get(ses.parents['project'])
    deid_profile_list = [f for f in project.files if 'blank-export' in f.name]
    if len(deid_profile_list) > 0:
        inputs = {"deid_profile": deid_profile_list[0]}
        config = {"project_path": 'detre_group/Neuroprint Validation', 'overwrite_files': True}
        asys_label = 'deid-export_WT_NeuroprintValidation'
        analysis_id = gear_fw.run(analysis_label=asys_label, config=config, inputs=inputs, destination=ses)

In [51]:
# check status real quick
# 11/24/2021 - deleted them all so it now shows run=False for all sessions, but that's to be expected
data_dict = {'subject':[],'session':[],'run':[], 'status':[]}
for ses in collection_sessions:
    session = ses.reload()
    sub_label = session.subject.label
    ses_label = session.label
    analyses = session.analyses

    if len(analyses) == 0:
        run = 'False'
    else:
        # Loop through the analyses
        matches = [asys for asys in analyses if asys.gear_info.get('name') == gear]
        if len(matches) == 0:
            run = 'False'
            status = 'na'
        else:
            status = matches[0].job.get('state')
            run = 'True'
            
    data_dict['subject'].append(sub_label)
    data_dict['session'].append(ses_label)
    data_dict['run'].append(run)
    data_dict['status'].append(status)

df = pd.DataFrame.from_dict(data_dict)
df

Unnamed: 0,subject,session,run,status
0,123894,123894x20190327x3T,False,na
1,100049,100049x20160621x3T,False,na
2,100113,100113x20160620x3T,False,na
3,120937,120937x20160711x3T,False,na
4,119851,119851x20170606x3T,False,na
5,121105,121105x20170427x3T,False,na
6,122005,122005x20171213x3T,False,na
7,100957,100957_20110426,False,na
8,100551,20081008x1021,True,complete
9,100978,20041103x1130,True,complete


In [52]:
# delete gear runs (they'll still remain in provenance, but will be out of sight for other people)
for ses in collection_sessions:
    for analysis in ses.analyses:
        if 'deid-export_WT_NeuroprintValidation' in analysis.label and analysis.job.state=='complete':
            fw.delete_session_analysis(ses.id, analysis.id)
            print(f"Deleted {analysis.label} from {ses.label}.")

Deleted deid-export_WT_NeuroprintValidation from 20081008x1021.
Deleted deid-export_WT_NeuroprintValidation from 20041103x1130.
Deleted deid-export_WT_NeuroprintValidation from 20090106x1214.
Deleted deid-export_WT_NeuroprintValidation from 20101220x1159.
Deleted deid-export_WT_NeuroprintValidation from 20091109x1206.
Deleted deid-export_WT_NeuroprintValidation from 20091103x1415.
Deleted deid-export_WT_NeuroprintValidation from 20111103x1154.
Deleted deid-export_WT_NeuroprintValidation from 20090316x1310.
Deleted deid-export_WT_NeuroprintValidation from 20090512x1126.
Deleted deid-export_WT_NeuroprintValidation from 20040928x1330.
Deleted deid-export_WT_NeuroprintValidation from 20090218x1235.
Deleted deid-export_WT_NeuroprintValidation from 20091201x1029.
Deleted deid-export_WT_NeuroprintValidation from 20060726x1128.
Deleted deid-export_WT_NeuroprintValidation from 20050705x1118.
Deleted deid-export_WT_NeuroprintValidation from 20080108x1018.
Deleted deid-export_WT_NeuroprintValidat

## Remove non-T1 acquisitions

In [29]:
project_sessions = [fw.get_session(x.id) for x in fw.get_project_sessions(project_id)]

In [43]:
delete_dict = {'session':[], 'acquisition':[],'status':[]}
for ses in project_sessions:
    acqlist = ses.acquisitions()
    for acq in acqlist:        
        lab = acq.label.lower()
        if ("vnav" in lab) and ("moco" in lab) and ("rms" in lab) and not ("nd" in lab) and not ('passive' in lab):
            status = 'kept'
        elif ("vnav" in lab) and ("rms" in lab) and not ("nd" in lab):
            status = 'kept'
        elif ("ax" in lab) and ("mprage" in lab):
            status = 'kept'
        elif ("sag" in lab) and ("mprage" in lab):
            status = 'kept'
        elif ("t1_3d" in lab and not ("nd" in lab)): #NACC
            status = 'kept'
        else:
            fw.delete_acquisition(acq.id)
            status='deleted'

        delete_dict['session'].append(ses.label)
        delete_dict['acquisition'].append(acq.label)
        delete_dict['status'].append(status)
        

delete_df = pd.DataFrame.from_dict(delete_dict)
delete_df = delete_df.sort_values('status', ascending=False) # show what's kept at the top
delete_df.head()

Unnamed: 0,session,acquisition,status
0,123894x20190327x3T,T1_3D_0.8x0.8x0.8,kept
471,124822x20210126x3T,T1_3D_0.8x0.8x0.8,kept
755,20101220x1159,t1_mpr_AX_MPRAGE,kept
982,20151007x1556,t1_mpr_AX_MPRAGE,kept
750,20090106x1214,t1_mpr_AX_MPRAGE,kept


In [42]:
# Append a timestamp to our csv name so it won't overwrite anything when we upload it to flywheel
time_fmt = '%m-%d-%Y_%H-%M-%S'
time_string = datetime.now().strftime(time_fmt)
csv_out = f'../run_reports/{gear}_DeletionReport_{time_string}.csv'

delete_df.to_csv(csv_out,index=False)

project.upload_file(csv_out)

In [41]:
# which have duplicates?
kept = delete_df[delete_df.status=='kept']
kept = list(kept['session'])
import collections
dups = [item for item, count in collections.Counter(kept).items() if count > 1]
dups

['20151007x1556',
 '20160121x1259',
 '20171213x1012',
 '20190710x1517',
 '20170920x1313',
 '20180110x1405',
 '20180605x1417',
 '20050209x1135']

## Move ANTs analyses to new project

In [59]:
# download the analysis data from the collection
ants_gear = 'antsct-aging-fw'
for ses in collection_sessions:
    adir = os.path.join('/media/will/Samsung_T5/store/analyses', ses.subject.label, ses.label)
    os.makedirs(adir, exist_ok=True)
    for asys in ses.analyses:
        if (asys.gear_info.get('name') == ants_gear) and (asys.job['state']=='complete') and (asys.files):
            # select file and download
            antsct_output = [f for f in asys.files if f.name.endswith('zip')][0].name
            download_file=os.path.join(adir, antsct_output)
            if not os.path.exists(download_file):
                print(f"Downloading from {ses.subject.label}/{ses.label}...")
                asys.download_file(antsct_output, download_file)
            else:
                print("   Skipping (already downloaded)...")
            # record label in text output
            with open(f'{adir}/label.txt', 'w') as f:
                f.write(asys.label)


   Skipping (already downloaded)...
   Skipping (already downloaded)...
   Skipping (already downloaded)...
   Skipping (already downloaded)...
   Skipping (already downloaded)...
   Skipping (already downloaded)...
   Skipping (already downloaded)...
   Skipping (already downloaded)...
   Skipping (already downloaded)...
   Skipping (already downloaded)...
Downloading from 101841x02/20090106x1214...
   Skipping (already downloaded)...
Downloading from 104190/20091109x1206...
Downloading from 105223/20091103x1415...
   Skipping (already downloaded)...
   Skipping (already downloaded)...
   Skipping (already downloaded)...
   Skipping (already downloaded)...
Downloading from 108790/20090218x1235...
Downloading from 109198/20091201x1029...
   Skipping (already downloaded)...
   Skipping (already downloaded)...
   Skipping (already downloaded)...
   Skipping (already downloaded)...
Downloading from 115001/20090331x0933...
Downloading from 115264/20090304x1208...
Downloading from 116504/20

In [68]:
# create analyses in Flywheel project
for ses in project_sessions:
    # get the input T1 file, which should already be in the session's acquisitions
    acq = ses.acquisitions()[0] # should only be one after deletion
    nii = [f for f in acq.files if f.name.endswith('.nii.gz')][0]
    file_ref = acq.get_file(nii.name).ref()
    # print(file_ref)    
    
    # create the analysis container 
    adir = os.path.join('/media/will/Samsung_T5/store/analyses', ses.subject.label, ses.label)
    try:
        with open (f"{adir}/label.txt", "r") as label: # read the label text
            lab=label.readlines()
        #os.remove(f"{adir}/label.txt")
    except FileNotFoundError as e:
        print(e)
        continue
    
    session = fw.get(ses.id)
    if len(session.analyses) < 1:
        asys_label = lab[0]
        asys = ses.add_analysis(label=asys_label,inputs=[file_ref])
    
        # upload the results zip file 
        zip_file = glob.glob(adir+"/*.zip")[0]
        print(zip_file)
        asys.upload_output(zip_file)


/media/will/Samsung_T5/store/analyses/122242/20170810x1221/antsct_sub-122242_ses-20170810x1221.zip
/media/will/Samsung_T5/store/analyses/122417/20171213x1012/antsct_sub-122417_ses-20171213x1012.zip
/media/will/Samsung_T5/store/analyses/122601/20170920x1313/antsct_sub-122601_ses-20170920x1313.zip
/media/will/Samsung_T5/store/analyses/122821/20180110x1405/antsct_sub-122821_ses-20180110x1405.zip
/media/will/Samsung_T5/store/analyses/123352/20180605x1417/antsct_sub-123352_ses-20180605x1417.zip
/media/will/Samsung_T5/store/analyses/125098/20190710x1517/antsct_sub-125098_ses-20190710x1517.zip
/media/will/Samsung_T5/store/analyses/105371/20071009x1020/antsct_sub-105371_ses-20071009x1020.zip
/media/will/Samsung_T5/store/analyses/105468/20060802x1113/antsct_sub-105468_ses-20060802x1113.zip
/media/will/Samsung_T5/store/analyses/114666/20050209x1135/antsct_sub-114666_ses-20050209x1135.zip


In [70]:
# check that all the analyses are uploaded
n = 1
for ses in project_sessions:
    ses = fw.get(ses.id)
    for asys in ses.analyses:
        print(f"{n}. {ses.label}/{asys.label}")
    n = n + 1

1. 123894x20190327x3T/antsct_2021-05-13_WT_PVS
2. 100049x20160621x3T/antsct_2021-05-13_WT_PVS
3. 100957_20110426/100957_100957_20110426_antsct-aging-fw_0.3.1_0.3.3_2021-11-16_12:11
4. 100113x20160620x3T/100113_100113x20160620x3T_antsct-aging-fw_0.3.1_0.3.3_2021-11-16_12:11
5. 120937x20160711x3T/120937_120937x20160711x3T_antsct-aging-fw_0.3.1_0.3.3_2021-11-16_12:11
6. 119851x20170606x3T/antsct_2021-05-13_WT_PVS
7. 121105x20170427x3T/antsct-aging-fw_2021-07-09_WT
8. 122005x20171213x3T/antsct-aging-fw_2021-07-09_WT
9. 122216x20200205x3T/antsct_2021-05-13_WT_PVS
10. 125687x20200714x3T/125687_125687x20200714x3T_antsct-aging-fw_0.3.1_0.3.3_2021-11-16_12:11
11. 112286x01x20200721x3T/antsct-aging_2021-11-5_17:48_WT_PVS
12. 124509x20200813x3T/antsct_2021-05-13_WT_PVS
13. 107486x20200929x3T/antsct_2021-05-13_WT_PVS
14. 125564x20201105x3T/125564_125564x20201105x3T_antsct-aging-fw_0.3.1_0.3.3_2021-11-16_12:11
15. 123831x20201201x3T/antsct_2021-05-13_WT_PVS
16. 123575x20201015x3T/antsct_2021-05-13_