# Combining forecast files on lead time dimension 

In [None]:
import os
import sys
import yaml
from glob import glob
from datetime import datetime

import numpy as np
import xarray as xr

from concurrent.futures import ThreadPoolExecutor

import warnings
warnings.filterwarnings('ignore')

In [None]:
sys.path.insert(0, os.path.realpath('../libs/'))
import verif_utils as vu

In [None]:
config_name = os.path.realpath('verif_config.yml')

with open(config_name, 'r') as stream:
    conf = yaml.safe_load(stream)

In [None]:
model_name = 'wxformer'

In [None]:
def process_files_concurrently(base_dir, all_files_list, output_dir, variables_levels, time_intervals=None, max_workers=10):
    """
    Process files concurrently using ThreadPoolExecutor.
    """
    # create dir if it does not exist
    vu.create_dir(output_dir)
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(vu.process_file_group, file_list, output_dir, 
                                   variables_levels, time_intervals) for file_list in all_files_list]
        for future in futures:
            future.result()  # Wait for all futures to complete

## Selected variables and levels

In [None]:
variables_levels = conf[model_name]['verif_variables']

In [None]:
base_dir = conf[model_name]['save_loc_rollout']
output_dir = conf[model_name]['save_loc_gather']
time_intervals = None

# Get list of NetCDF files
all_files_list = vu.get_nc_files(base_dir)

In [None]:
len(all_files_list) # <-- all files; it goes beyond 2020, may need a year filter

## Scenario: combine nc files on a single initilization time

netCDF time coord encoding warning is not resolved, but it will not impact verification results

In [None]:
# process_files_concurrently(base_dir, [all_files_list[201]], output_dir, variables_levels, time_intervals)

## Scenario: combine on a range of initializations

In [None]:
# ind_start = 0
# ind_end = 2192

# for i in range(ind_start, ind_end):
#     i_correct = i + ind_start
#     process_files_concurrently(base_dir, [all_files_list[i_correct]], output_dir, variables_levels, time_intervals)