# Generate qsub scripts for ERA5 data preparation

In [2]:
import os
import sys
import yaml
import numpy as np
import xarray as xr
from glob import glob

In [3]:
# sys.path.insert(0, os.path.realpath('../libs/'))
# import preprocess_utils as pu

In [4]:
conf = {
    'qsub': {
        'qsub_loc': '/glade/u/home/ksha/CONUS-404-prep/qsub/',
        'scripts_loc': '/glade/u/home/ksha/CONUS-404-prep/data_preprocessing_GP/scripts/'
    }
}

## CONUS 404 preprocess

In [4]:
IND_max = 376944
INDs = np.arange(0, IND_max+2400, 2400)
INDs[-1] = IND_max

In [3]:
len(glob('/glade/campaign/ral/hap/ksha/DWC_data/CONUS_domain_GP/raw_404/*'))

376944

In [5]:
for i, ind_start in enumerate(INDs[:-1]):
    
    ind_end = INDs[i+1]
    
    f = open('{}gather_C404_{:03d}.sh'.format(conf['qsub']['qsub_loc'], i), 'w') 
    
    heads = '''#!/bin/bash -l

#PBS -N gather_C404
#PBS -A P48500028
#PBS -l walltime=23:59:59
#PBS -l select=1:ncpus=4:mem=32GB
#PBS -q casper
#PBS -o gather_C404.log
#PBS -e gather_C404.err

conda activate credit
cd {}
python DATA00_C404_1h_prep.py {} {}
'''.format(conf['qsub']['scripts_loc'], ind_start, ind_end, ind_start, ind_end)
    
    print(heads, file=f)    
    f.close()

f = open('{}step00_gather_C404_all.sh'.format(conf['qsub']['qsub_loc']), 'w')

for i, ind_start in enumerate(INDs[:-1]):
    print('qsub gather_C404_{:03d}.sh'.format(i), file=f)
    
f.close()

In [4]:
IND_max = 17544
INDs = np.arange(0, IND_max+200, 200)
INDs[-1] = IND_max

In [5]:
len(glob('/glade/campaign/ral/hap/ksha/DWC_data/CONUS_domain_GP/raw_404_new/*'))

17544

In [11]:
for i, ind_start in enumerate(INDs[:-1]):
    
    ind_end = INDs[i+1]
    
    f = open('{}gather_C404_{:03d}.sh'.format(conf['qsub']['qsub_loc'], i), 'w') 
    
    heads = '''#!/bin/bash -l

#PBS -N gather_C404
#PBS -A P48500028
#PBS -l walltime=23:59:59
#PBS -l select=1:ncpus=4:mem=32GB
#PBS -q casper
#PBS -o gather_C404.log
#PBS -e gather_C404.err

conda activate credit
cd {}
python DATA00_C404_1h_prep_new.py {} {}
'''.format(conf['qsub']['scripts_loc'], ind_start, ind_end, ind_start, ind_end)
    
    print(heads, file=f)    
    f.close()

f = open('{}step00_gather_C404_all.sh'.format(conf['qsub']['qsub_loc']), 'w')

for i, ind_start in enumerate(INDs[:-1]):
    print('qsub gather_C404_{:03d}.sh'.format(i), file=f)
    
f.close()

In [6]:
# for i, ind_start in enumerate(INDs[:-1]):
    
#     ind_end = INDs[i+1]
    
#     f = open('{}gather_Cextra_{:03d}.sh'.format(conf['qsub']['qsub_loc'], i), 'w') 
    
#     heads = '''#!/bin/bash -l

# #PBS -N gather_Cextra
# #PBS -A P48500028
# #PBS -l walltime=23:59:59
# #PBS -l select=1:ncpus=4:mem=32GB
# #PBS -q casper
# #PBS -o gather_Cextra.log
# #PBS -e gather_Cextra.err

# conda activate credit
# cd {}
# python DATA00_C404_1h_extra.py {} {}
# '''.format(conf['qsub']['scripts_loc'], ind_start, ind_end, ind_start, ind_end)
    
#     print(heads, file=f)    
#     f.close()

# f = open('{}step00_gather_Cextra_all.sh'.format(conf['qsub']['qsub_loc']), 'w')

# for i, ind_start in enumerate(INDs[:-1]):
#     print('qsub gather_Cextra_{:03d}.sh'.format(i), file=f)
    
# f.close()

In [5]:
for i, ind_start in enumerate(INDs[:-1]):
    
    ind_end = INDs[i+1]
    
    f = open('{}gather_Cextra_{:03d}.sh'.format(conf['qsub']['qsub_loc'], i), 'w') 
    
    heads = '''#!/bin/bash -l

#PBS -N gather_Cextra
#PBS -A P48500028
#PBS -l walltime=20:59:59
#PBS -l select=1:ncpus=4:mem=16GB
#PBS -q casper
#PBS -o gather_Cextra.log
#PBS -e gather_Cextra.err

conda activate credit
cd {}
python DATA00_C404_1h_L_extra.py {} {}
'''.format(conf['qsub']['scripts_loc'], ind_start, ind_end, ind_start, ind_end)
    
    print(heads, file=f)    
    f.close()

f = open('{}step00_gather_Cextra_all.sh'.format(conf['qsub']['qsub_loc']), 'w')

for i, ind_start in enumerate(INDs[:-1]):
    print('qsub gather_Cextra_{:03d}.sh'.format(i), file=f)
    
f.close()

## CONUs 404 packing

In [5]:
years = np.arange(2022, 2025)

In [6]:
for i_year, year in enumerate(years):
    
    f = open('{}C404_pack_{}.sh'.format(conf['qsub']['qsub_loc'], year), 'w') 
    
    heads = '''#!/bin/bash -l

#PBS -N {}
#PBS -A P48500028
#PBS -l walltime=23:59:59
#PBS -l select=1:ncpus=4:mem=256GB
#PBS -q casper
#PBS -o {}.log
#PBS -e {}.err

conda activate credit
cd {}
python DATA01_C404_packing.py {}
'''.format(year, year, year, 
           conf['qsub']['scripts_loc'], 
           year)
    
    print(heads, file=f)    
    f.close()

f = open('{}batch_C404_pack.sh'.format(conf['qsub']['qsub_loc']), 'w')
for i_year, year in enumerate(years):
    print('qsub C404_pack_{}.sh'.format(year), file=f)
f.close()

## ERA5 packing

In [6]:
for i_year, year in enumerate(years):
    
    f = open('{}ERA5_pack_{}.sh'.format(conf['qsub']['qsub_loc'], year), 'w') 
    
    heads = '''#!/bin/bash -l

#PBS -N {}
#PBS -A NAML0001
#PBS -l walltime=7:59:59
#PBS -l select=1:ncpus=4:mem=64GB
#PBS -q casper
#PBS -o {}.log
#PBS -e {}.err

conda activate credit
cd {}
python DATA01_ERA5_packing.py {}
'''.format(year, year, year, 
           conf['qsub']['scripts_loc'], 
           year)
    
    print(heads, file=f)    
    f.close()

f = open('{}batch_ERA5_pack.sh'.format(conf['qsub']['qsub_loc']), 'w')
for i_year, year in enumerate(years):
    print('qsub ERA5_pack_{}.sh'.format(year), file=f)
f.close()

## Descale data prep

In [4]:
years = np.arange(1980, 2024)

In [6]:
for i_year, year in enumerate(years):
    
    f = open('{}C404_dscale_{}.sh'.format(conf['qsub']['qsub_loc'], year), 'w') 
    
    heads = '''#!/bin/bash -l

#PBS -N {}
#PBS -A P48500028
#PBS -l walltime=7:59:59
#PBS -l select=1:ncpus=4:mem=80GB
#PBS -q casper
#PBS -o {}.log
#PBS -e {}.err

conda activate credit
cd {}
python DATA04_C404_dscale_data.py {}
'''.format(year, year, year, 
           conf['qsub']['scripts_loc'], 
           year)
    
    print(heads, file=f)    
    f.close()

f = open('{}batch_C404_dscale.sh'.format(conf['qsub']['qsub_loc']), 'w')
for i_year, year in enumerate(years):
    print('qsub C404_dscale_{}.sh'.format(year), file=f)
f.close()

## Solar radiation calc

In [6]:
years = np.arange(1980, 2030)

In [7]:
for i_year, year in enumerate(years):
    
    f = open('{}TSI_{}.sh'.format(conf['qsub']['qsub_loc'], year), 'w') 
    
    heads = '''#!/bin/bash -l

#PBS -N {}
#PBS -A NAML0001
#PBS -l walltime=7:59:59
#PBS -l select=1:ncpus=4:mem=64GB
#PBS -q casper
#PBS -o {}.log
#PBS -e {}.err

conda activate credit
cd {}
python DATA02_WRF_solar_compute.py {}
'''.format(year, year, year, 
           conf['qsub']['scripts_loc'], 
           year)
    
    print(heads, file=f)    
    f.close()

f = open('{}batch_TSI.sh'.format(conf['qsub']['qsub_loc']), 'w')
for i_year, year in enumerate(years):
    print('qsub TSI_{}.sh'.format(year), file=f)
f.close()