# Check Folders
Check whether folders are complete. Also create a list of which ranges are incomplete.

Nicolas Chan, 10/19/2017

In [11]:
# Configuration
results_folder = '/global/scratch/groups/dh/aanderson/results/'
results_archive_folder = '/global/scratch/groups/dh/aanderson/run_archive/'
incomplete_list_path = '/global/scratch/groups/dh/aanderson/incomplete.txt'
complete_list_path = '/global/scratch/groups/dh/aanderson/complete.txt'

upload_threshold = 10

In [12]:
from os import listdir

incomplete = []
completed = []
consecutive_completed_ranges = [[]]

results_folders = sorted(listdir(results_folder))
for subfolder in results_folders:
    contents = listdir(results_folder + '/' + subfolder)
    # A folder is considered complete if it contains a bighitlist.txt and a smallhitlist.txt
    complete = 'bighitlist.txt' in contents and 'smallhitlist.txt' in contents
    
    if complete:
        completed.append(subfolder)
    else:
        incomplete.append(subfolder)
    
    if complete:
        consecutive_completed_ranges[-1].append(subfolder)
    elif len(consecutive_completed_ranges[-1]) > 0:
        consecutive_completed_ranges.append([])
        
    print(subfolder, 'Complete' if complete else 'INCOMPLETE')

consecutive_completed_ranges = [ r for r in consecutive_completed_ranges if len(r) > 1 ]
print(consecutive_completed_ranges)

Results_141-153 Complete
Results_154-158 INCOMPLETE
Results_159-163 Complete
Results_164-168 INCOMPLETE
Results_169-173 Complete
Results_174-183 INCOMPLETE
Results_179-183 Complete
Results_184-203 INCOMPLETE
[]


In [13]:
# Write list of incomplete ranges
import datetime

incomplete_list = open(incomplete_list_path, 'w')
incomplete_list.write('# Generated ' + str(datetime.datetime.now()) + '\n')
for inc in incomplete:
    incomplete_list.write(inc + '\n')
incomplete_list.close()

In [14]:
# Write list of complete ranges
complete_list = open(complete_list_path, 'w')
complete_list.write('# Generated ' + str(datetime.datetime.now()) + '\n')
for c in completed:
    complete_list.write(c + '\n')
complete_list.close()

In [15]:
# concatenate_files based on https://stackoverflow.com/a/13613375/8706910
def concatenate_files(files, output_file):
    with open(output_file, 'w', encoding='utf-8') as output:
        for file in files:
            with open(file, encoding='utf-8') as input_file:
                for line in input_file:
                    output.write(line)
                    
def results_folder_to_range(folder_name):
    start, end = folder_name.split('_')[1].split('-')
    return int(start), int(end)

import os
import sys
import errno

# dir-create copied from AdamAndersonFindSumerianWorkflow
def dir_create(path):
    try:
        os.makedirs(path)
    except OSError as exception:
        if exception.errno != errno.EEXIST:
            raise
        else:
            print('Folder at: ' + path + ' already exists. Skipping...')

import shutil
# Merge all consecutive completed ranges into a single range
for completed_range in consecutive_completed_ranges:
    start_index = None
    end_index = None
    for folder in completed_range:
        start, end = results_folder_to_range(folder)
        start_index = start if start_index == None else min(start_index, start)
        end_index = end if end_index == None else max(end_index, end)
    
    smallhits_paths = [ results_folder + folder + '/smallhitlist.txt' for folder in completed_range ]
    bighits_paths = [ results_folder + folder + '/bighitlist.txt' for folder in completed_range ]
    
    merge_folder = results_folder + 'Results_' + str(start_index) + '-' + str(end_index)
    dir_create(merge_folder)
    concatenate_files(smallhits_paths, merge_folder + '/smallhitlist.txt')
    concatenate_files(bighits_paths, merge_folder + '/bighitlist.txt')
    print('Merged Range', start_index, end_index)
    
    # Move old folders to achive
    for folder in completed_range:
        shutil.move(results_folder + folder, results_archive_folder)
    
    

In [16]:
# If a range is large enough, upload Google drive
# Added by Nicolas Chan based on previous code, 10/12/2017
def upload_txt_file(name, path, destination_folder=None):
    """Upload a text file to Google Drive"""
    
    file_metadata = { 'name': name }
    if destination_folder:
        file_metadata['parents'] = [destination_folder]
    media = MediaFileUpload(path, mimetype='text/plain')
    file = service.files().create(
        body=file_metadata,
        media_body=media,
        fields='id'
    ).execute()
    print('Uploaded', name, '; ID:', file.get('id'))

results_folders = sorted(listdir(results_folder))
for folder in results_folders:
    contents = listdir(results_folder + '/' + subfolder)
    # A folder is considered complete if it contains a bighitlist.txt and a smallhitlist.txt
    complete = 'bighitlist.txt' in contents and 'smallhitlist.txt' in contents
    if not complete:
        continue
    start, end = results_folder_to_range(folder)
    size = start - end + 1
    if size >= upload_threshold:
        upload_txt_file('smallhitlist.txt', runFolder + 'smallhitlist.txt', google_folder)
        upload_txt_file('bighitlist.txt', runFolder + 'bighitlist.txt', google_folder)