# Combining forecast files on lead time dimension 

In [1]:
import os
import sys
import yaml
from glob import glob
from datetime import datetime

import numpy as np
import xarray as xr

from concurrent.futures import ThreadPoolExecutor

import warnings
warnings.filterwarnings('ignore')

In [2]:
sys.path.insert(0, os.path.realpath('../libs/'))
import verif_utils as vu

In [3]:
config_name = os.path.realpath('verif_config_6h.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)

In [4]:
model_name = 'fuxi'

In [5]:
def process_files_concurrently(base_dir, all_files_list, output_dir, 
                               variables_levels, time_intervals=None, max_workers=10):
    '''
    Process files concurrently using ThreadPoolExecutor.
    '''
    # create dir if it does not exist
    vu.create_dir(output_dir)
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(vu.process_file_group, file_list, output_dir, 
                                   variables_levels, time_intervals) for file_list in all_files_list]
        for future in futures:
            future.result()  # Wait for all futures to complete

## Selected variables and levels

In [6]:
variables_levels = conf[model_name]['verif_variables']

In [7]:
base_dir = conf[model_name]['save_loc_rollout']
output_dir = conf[model_name]['save_loc_gather']
time_intervals = None

# Get list of nc files
all_files_list = vu.get_nc_files(base_dir)

In [8]:
len(all_files_list) # <-- all files; it goes beyond 2020, may need a year filter

768

## Scenario: combine nc files on a single initilization time

In [9]:
# process_files_concurrently(base_dir, [all_files_list[201]], output_dir, variables_levels, time_intervals)

## Scenario: combine on a range of initializations

In [10]:
# ind_start = 0
# ind_end = len(all_files_list)

# flag_overall = False

# while flag_overall is False:
    
#     flag_overall = True
#     for i in range(ind_start, ind_end):
#         # True: process can pass
#         flag = vu.process_file_group(all_files_list[i], output_dir, variables_levels, size_thres=917533564)

#     flag_overall = flag_overall and flag

Processing subdirectory: 2020-01-01T00Z
Output name: /glade/derecho/scratch/ksha/CREDIT/GATHER/fuxi_6h/2020-01-01T00Z.nc
File /glade/derecho/scratch/ksha/CREDIT/GATHER/fuxi_6h/2020-01-01T00Z.nc is valid ... move to file size checks.
Skipping 2020-01-01T00Z as /glade/derecho/scratch/ksha/CREDIT/GATHER/fuxi_6h/2020-01-01T00Z.nc already exists and exceeds size threshold.
Processing subdirectory: 2020-01-01T12Z
Output name: /glade/derecho/scratch/ksha/CREDIT/GATHER/fuxi_6h/2020-01-01T12Z.nc
File /glade/derecho/scratch/ksha/CREDIT/GATHER/fuxi_6h/2020-01-01T12Z.nc is valid ... move to file size checks.
Skipping 2020-01-01T12Z as /glade/derecho/scratch/ksha/CREDIT/GATHER/fuxi_6h/2020-01-01T12Z.nc already exists and exceeds size threshold.
Processing subdirectory: 2020-01-02T00Z
Output name: /glade/derecho/scratch/ksha/CREDIT/GATHER/fuxi_6h/2020-01-02T00Z.nc
File /glade/derecho/scratch/ksha/CREDIT/GATHER/fuxi_6h/2020-01-02T00Z.nc is valid ... move to file size checks.
Skipping 2020-01-02T00Z a