In [1]:
import logging
from pathlib import Path
import shutil

import pandas as pd
import pyprojroot
import vak

In [2]:
results_root = pyprojroot.here() / 'results'

Use error .csv files to get results dirs

In [3]:
err_csvs = sorted(results_root.rglob('err*csv'))
err_csvs = [
    err_csv
    for err_csv in err_csvs
    if 'Bengalese_Finches' in str(err_csv)
]

In [4]:
these_expt_dirs = [
    'filter_num',
    'filter_size',
    'hidden_size',
    'learncurve',
    'window_size',
]

In [5]:
err_csvs = [
    err_csv
    for err_csv in err_csvs
    if any([expt_dir in str(err_csv) for expt_dir in these_expt_dirs])
]           

In [6]:
results_dirs = []
for err_csv in err_csvs:
    df = pd.read_csv(err_csv)
    results_dirs.extend(
        df['results_dir'].unique().tolist()
    )

Now get spectrogram directories from configs copies to results dirs.  
And find the unique set of spectrogram directories.

In [7]:
results_dir_paths = []

for results_dir in results_dirs:
    # need to fix path for window_size_352, it's different for some reason
    if not results_dir.startswith('results') and results_dir.startswith(
        '/home/art/Documents/repos/coding/birdsong/tweetynet/'
    ):
        results_dir = results_dir.replace('/home/art/Documents/repos/coding/birdsong/tweetynet/', '')

    results_dir_path = pyprojroot.here() / results_dir
    # print(f'exists: {results_dir_path.exists()} -- {results_dir}')
    results_dir_paths.append(results_dir_path)

In [8]:
cd /home/art/Documents/repos/coding/birdsong/tweetynet/article

/home/art/Documents/repos/coding/birdsong/tweetynet/article


In [9]:
spect_generated_dirs = []

for results_dir_path in results_dir_paths:
    config_path = sorted(results_dir_path.glob('*.toml'))
    assert len(config_path) == 1
    config_path = config_path[0]

    config = vak.config.parse.from_toml_path(config_path)
    data_csv_path = config.learncurve.csv_path

    data_df = pd.read_csv(data_csv_path)

    spect_path_parents = [Path(spect_path).parent
                          for spect_path in data_df['spect_path'].values]
    spect_path_parents = set(spect_path_parents)
    spect_generated_dirs.extend(list(spect_path_parents))

uniq_spect_generated_dirs = list(set(spect_generated_dirs))

In [10]:
for ind, spect_generated_dir in enumerate(list(uniq_spect_generated_dirs)):
    if 'tweetynet/data' in str(spect_generated_dir):
        spect_generated_dir = Path(str(spect_generated_dir).replace('tweetynet/data', 'tweetynet/article/data'))
    if spect_generated_dir.exists():
        print(spect_generated_dir, f"exists: {spect_generated_dir.exists()}")
        uniq_spect_generated_dirs[ind] = spect_generated_dir
    else:
        raise FileNotFoundError(
            f'did not find data directory: {spect_generated_dir}'
        )

/home/art/Documents/data/BirdsongRecognition/Bird0/Wave/spectrograms_generated_210528_224944 exists: True
/home/art/Documents/repos/coding/birdsong/tweetynet/article/data/BFSongRepository/learncurve/gy6or6/spectrograms_generated_210423_094826 exists: True
/home/art/Documents/repos/coding/birdsong/tweetynet/article/data/BFSongRepository/learncurve/or60yw70/spectrograms_generated_210423_095512 exists: True
/home/art/Documents/data/BirdsongRecognition/Bird7/Wave/spectrograms_generated_210527_213331 exists: True
/home/art/Documents/repos/coding/birdsong/tweetynet/article/data/BFSongRepository/learncurve/bl26lb16/spectrograms_generated_210423_094143 exists: True
/home/art/Documents/data/BirdsongRecognition/Bird9/Wave/spectrograms_generated_210527_213351 exists: True
/home/art/Documents/data/BirdsongRecognition/Bird4/Wave/spectrograms_generated_210528_224959 exists: True
/home/art/Documents/repos/coding/birdsong/tweetynet/article/data/BFSongRepository/learncurve/gr41rd51/spectrograms_generat

In [11]:
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)

# add it to the root logger
logging.getLogger().addHandler(handler)

In [15]:
DRY_RUN = False
SKIP_EXISTING_TAR = False

BASE_NAME_ROOT_PATH = pyprojroot.here() / 'results' / 'tars'

logger = logging.Logger('targz.logger', level=logging.DEBUG)

for spect_dir in uniq_spect_generated_dirs:
    if not spect_dir.exists():
        print(
            f'does not exist: {spect_dir}'
        )
    else:
        print(
            f'\nMaking tar from:\n{spect_dir}'
        )

        if 'BFSongRepository' in str(spect_dir):
            animal_ID = spect_dir.parents[0].name
        elif 'BirdsongRecognition' in str(spect_dir):
            animal_ID = spect_dir.parents[1].name
        dir_name = spect_dir.name
        print(
            f'\n\tanimal ID: {animal_ID}\n\tdir name: {dir_name}'
        )
        
        base_filename = f'{animal_ID}-{dir_name}'
        base_name_path = BASE_NAME_ROOT_PATH / base_filename
        print(
            f'will generate archive as: {base_name_path}.tar.gz'
        )

        if SKIP_EXISTING_TAR:
            tar_path = Path(base_name_path.parent / (base_name_path.name + '.tar.gz'))
            if tar_path.exists():
                print (
                    f'\tSKIP_EXISTING_TAR is true and tar exists:\n\t{tar_path}.\n\tSkipping.'
                )

        if not DRY_RUN:
            shutil.make_archive(
                base_name_path,
                format='gztar',
                root_dir=spect_dir,
                logger=logger,
            )


Making tar from:
/home/art/Documents/data/BirdsongRecognition/Bird0/Wave/spectrograms_generated_210528_224944

	animal ID: Bird0
	dir name: spectrograms_generated_210528_224944
will generate archive as: /home/art/Documents/repos/coding/birdsong/tweetynet/article/results/tars/Bird0-spectrograms_generated_210528_224944.tar.gz

Making tar from:
/home/art/Documents/repos/coding/birdsong/tweetynet/article/data/BFSongRepository/learncurve/gy6or6/spectrograms_generated_210423_094826

	animal ID: gy6or6
	dir name: spectrograms_generated_210423_094826
will generate archive as: /home/art/Documents/repos/coding/birdsong/tweetynet/article/results/tars/gy6or6-spectrograms_generated_210423_094826.tar.gz

Making tar from:
/home/art/Documents/repos/coding/birdsong/tweetynet/article/data/BFSongRepository/learncurve/or60yw70/spectrograms_generated_210423_095512

	animal ID: or60yw70
	dir name: spectrograms_generated_210423_095512
will generate archive as: /home/art/Documents/repos/coding/birdsong/tweet