In [1]:
# Definitions and functions
import sys
import socket
import os
import logging
import glob
import shutil
from contextlib import closing

# Check wich computer to decide where the things are mounted
comp_name=socket.gethostname()
print('Computer: ' + comp_name)

if 'txori' in comp_name or 'passaro' in comp_name or 'lintu' in comp_name:
    repos_folder = os.path.abspath('/mnt/cube/earneodo/repos')
    experiment_folder = os.path.join('/mnt/cube/earneodo/bci_zf/')

sys.path.append(os.path.join(repos_folder, 'ephysflow'))
sys.path.append(os.path.join(repos_folder, 'swissknife'))

from bci.core import expstruct as et
from archivetools import tar as tt
from bci.core.file import file_functions as ff
from threadtools import threadedfunction as td

logger = logging.getLogger()
handler = logging.StreamHandler()
formatter = logging.Formatter(
        '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.DEBUG)

logger.debug('All modules loaded')

Computer: passaro


2017-02-10 11:41:45,606 root         DEBUG    All modules loaded


In [2]:
sys.version_info[0] >= 3

True

In [3]:
# bird and session
bird = 'z020'
data_folder = 'raw'

In [4]:
def archive_data(bird, sess, 
                 fold_type = 'raw', 
                 locations={'orig': 'store', 
                            'dest':'archive', 
                            'scratch':'scratch'}, 
                 mode='xz',
                 use_scratch=False):
    # mode options ''(no compression), 'xz','gz', 'bz2' refer to tarfile modes.
    # scratch is optional
    
    logger.info('Will archive {0} data from bird {1}, sess {2}'.format(fold_type, bird, sess))
    f_struct = {key : et.flex_file_names(bird, sess, location=item) for key, item in locations.items()}
    f_struct['dest'] = et.flex_file_names(bird, location=locations['dest'])
    
    source_folder = f_struct['orig']['folders'][fold_type]
    logger.info('Source folder: {}'.format(source_folder))
    
    
    if use_scratch:
        logger.debug('Using scratch')
        tmp_folder = f_struct['scratch']['folders']['tmp']
        dest_folder = f_struct['scratch']['folders'][fold_type]
        logger.info('Will copy source into tmp {0}\n and use as source_folder'.format(tmp_folder))
        if os.path.exists(tmp_folder):
            logger.info('Cleaning up temp folder')
            shutil.rmtree(tmp_folder)
        shutil.copytree(source_folder,tmp_folder)
        source_folder = tmp_folder
        logger.debug('Source folder is now {0}\n'.format(source_folder))

        
    else:
        dest_folder = f_struct['dest']['folders'][fold_type]
    assert(not (source_folder == dest_folder))
        
    if os.path.exists(dest_folder):
        logger.warning('Cleaning up pre-existing dest_folder {}'.format(dest_folder))
        shutil.rmtree(dest_folder)
    et.mkdir_p(dest_folder)
    logger.info('Dest folder: {}'.format(dest_folder))
    d_f, md_arch, md_list = tt.compress_folder(source_folder, dest_folder, mode='w:{}'.format(mode))
    check_list, check_ok = tt.check_tar_archive(d_f, md_checklist=md_list)
    
    if not check_ok:
        raise RuntimeError('Archive {} md5 hashes dont match the originals')
    
    if use_scratch:
        logger.debug('Have to move from sctratch to dest')
        final_dest_folder = f_struct['dest']['folders'][fold_type]
        logger.info('Final dest folder: {}'.format(final_dest_folder))
        shutil.move(dest_folder, final_dest_folder)
        logger.debug('Cleaning up tmp folder {}'.format(tmp_folder))
        shutil.rmtree(tmp_folder)
    
    return f_struct, check_list

def archive_bird(bird, locations = {'orig': 'experiment',
                                    'dest':'archive',
                                    'scratch':'scratch'},
                 data_folder='raw', 
                 overwrite=False):
    
    fn_orig = et.flex_file_names(bird, location=locations['orig'])
    fn_dest = et.flex_file_names(bird, location=locations['dest'])
    
    orig_folders_list = ff.list_folders(fn_orig['folders'][data_folder])
    
    dest_folder_names_list = [os.path.split(s)[-1] for s in ff.list_folders(fn_dest['folders'][data_folder])]
    
    bird_archived = []
    
    for s in orig_folders_list:
        sess = os.path.split(s)[-1]
        if sess in dest_folder_names_list:
            logger.info('{0} already present in {1}'.format(sess, fn_dest['folders'][data_folder]))
            if overwrite:
                logger.warning('Will delete {} to overwrite'.format(os.path.join(fn_dest['folders'][data_folder], s)))
                shutil.rmtree(os.path.join(fn_dest['folders'][data_folder], s))
            else:
                logger.info('Skipping')
                continue
        logger.info('Archiving {}'.format(s))
        fstru, chk = archive_data(bird, sess, fold_type=data_folder, locations=locations, mode='xz', use_scratch=True)
        bird_archived.append([s, fstru, chk])
            
    logger.info('Done making archive for {0} sessions of bird {1}'.format(len(bird_archived), bird))

#fstru, chk = archive_data(bird, arch_sess, use_scratch=True)

In [5]:
# check that everything is on cube (for instance)
fn_b = et.flex_file_names(bird, location='experiment')
l_cube = ff.list_folders(fn_b['folders']['raw'])

fn_store = et.flex_file_names(bird, location='store')
l_store = ff.list_folders(fn_store['folders']['raw'])

fn_store = et.flex_file_names(bird, location='experiment_local')
l_local=ff.list_folders(fn_store['folders']['raw'])

fn_archive = et.flex_file_names(bird, location='archive')
l_archived = ff.list_folders(fn_archive['folders']['raw'])


for i in l_local + l_store:
    if not os.path.split(i)[-1] in [os.path.split(l)[-1] for l in l_cube]:
        print(i)
        #shutil.copytree(i, os.path.join(fn_b['folders']['raw'], os.path.split(i)[-1]))

In [6]:
l_cube

['/mnt/cube/earneodo/bci_zf/raw_data/z020/2016-06-03_10-53-56_100',
 '/mnt/cube/earneodo/bci_zf/raw_data/z020/001',
 '/mnt/cube/earneodo/bci_zf/raw_data/z020/2016-06-02_20-22-38_100',
 '/mnt/cube/earneodo/bci_zf/raw_data/z020/2016-06-02_11-10-42_2100_00',
 '/mnt/cube/earneodo/bci_zf/raw_data/z020/2016-06-04_17-46-03_100',
 '/mnt/cube/earneodo/bci_zf/raw_data/z020/sleep_remote_2016-06-05_00-54-57_100',
 '/mnt/cube/earneodo/bci_zf/raw_data/z020/2016-06-06_10-47-28_150',
 '/mnt/cube/earneodo/bci_zf/raw_data/z020/2016-06-08_15-28-10_150',
 '/mnt/cube/earneodo/bci_zf/raw_data/z020/2016-06-05_07-51-42_100',
 '/mnt/cube/earneodo/bci_zf/raw_data/z020/2016-06-03_16-12-39_100',
 '/mnt/cube/earneodo/bci_zf/raw_data/z020/2016-06-15_14-05-24_150',
 '/mnt/cube/earneodo/bci_zf/raw_data/z020/2016-06-06_09-15-56_150',
 '/mnt/cube/earneodo/bci_zf/raw_data/z020/2016-06-27_16-33-20_225',
 '/mnt/cube/earneodo/bci_zf/raw_data/z020/2016-06-05_11-18-10_150',
 '/mnt/cube/earneodo/bci_zf/raw_data/z020/2016-06-0

In [7]:
log_fn = et.file_names(bird)
log_file_path = os.path.join(log_fn['folders']['ss'], 
                             'archive_all_{}.log'.format(data_folder))
fh = logging.FileHandler(log_file_path)
formatter = logging.Formatter('%(asctime)s - %(threadName)-10s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)
# add the handlers to the logger
logger.addHandler(fh)

In [8]:
locations={'orig': 'experiment', 
        'dest':'archive', 
        'scratch':'scratch'}

#archive_bird(bird, locations=locations, overwrite=False)
a_archive = td.threadedFunction(args=(archive_bird,
                                    bird),
                               kwargs={'locations': locations,
                                       'data_folder': data_folder})
a_archive.start()

2017-02-10 11:42:05,460 threadtools.threadedfunction INFO     Starting function  in thread threadedFunction-1
2017-02-10 11:42:05,466 root         INFO     Archiving /mnt/cube/earneodo/bci_zf/raw_data/z020/2016-06-03_10-53-56_100
2017-02-10 11:42:05,467 root         INFO     Will archive raw data from bird z020, sess 2016-06-03_10-53-56_100
2017-02-10 11:42:05,471 root         INFO     Source folder: /mnt/cube/earneodo/bci_zf/raw_data/z020/2016-06-03_10-53-56_100
2017-02-10 11:42:05,472 root         DEBUG    Using scratch
2017-02-10 11:42:05,474 root         INFO     Will copy source into tmp /usr/local/experiment/scratchpad/tmp_data/z020/2016-06-03_10-53-56_100
 and use as source_folder
2017-02-10 11:50:11,399 root         DEBUG    Source folder is now /usr/local/experiment/scratchpad/tmp_data/z020/2016-06-03_10-53-56_100

2017-02-10 11:50:11,403 bci.core.expstruct DEBUG    Creating directory /usr/local/experiment/scratchpad/raw_data/z020/2016-06-03_10-53-56_100
2017-02-10 11:50:11,40

In [18]:
a_archive.terminate()

In [10]:
a_archive.is_alive()

False

In [34]:
os.path.exists('/usr/local/experiment/scratchpad/tmp_data/z017/2016-07-11_13-11-17_225')

True