This script is used to aggregated the chi from different months

In [1]:
%matplotlib inline
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import gc
import time
from tqdm import tqdm

def workflow(path, save_path, year):
    chi_dict = {}
    chi_ls = ["chi_b","chi_c","chi_h"]
    chi_dict ={}
    for chi in chi_ls:
        chi_dict[chi] = []

    for i in tqdm(range(1,13)):
        month=str(i).zfill(2)
        #print("start the month:",month)
        ds = xr.open_dataset(path+str(year)+"_"+month+".nc")
        for chi in chi_ls:
            chi_dict[chi].append(ds[chi])
        del ds
        gc.collect()
    print("Finished load data, start to merge data")
    
    for chi in tqdm(chi_ls):
        print("start to merge",chi)
        t0 = time.time()
        # save ds and del list
        ds = xr.merge(chi_dict[chi])
        print("finished merge",chi,". It took",time.time()-t0)
        
        print("start to save",chi)
        t0 = time.time()
        ds.to_netcdf(save_path+str(year)+"_"+chi+".nc")
        del chi_dict[chi]
        gc.collect()
        print("finished save",chi,". It took",time.time()-t0)
        
        print("start to save the mean and std of",chi)
        # save mean and std
        ds_mean = ds.mean(dim="time")
        ds_mean.to_netcdf(save_path+str(year)+"_"+chi+"_mean.nc")
        ds_std = ds.std(dim="time")
        ds_std.to_netcdf(save_path+str(year)+"_"+chi+"_std.nc")
        print("finished mean, std of",chi,". It took",time.time()-t0)
        
        del ds, ds_mean, ds_std
        gc.collect()
        print("\n")

## save the aggregated chi

In [2]:
year = "2011"
# define the path and save path for mam4
mam4_path = "/data/keeling/a/zzheng25/d/mam4_paper_data/mam4_cesm_cal/"
mam4_save_path = "/data/keeling/a/zzheng25/d/mam4_paper_data/chi_only/mam4_chi/"
#mam4
workflow(mam4_path, mam4_save_path, year)

# define the path and save path for ml
ml_path = "/data/keeling/a/zzheng25/d/mam4_paper_data/mam4_cesm_pred/"
ml_save_path = "/data/keeling/a/zzheng25/d/mam4_paper_data/chi_only/ml_chi/"
#ml
workflow(ml_path, ml_save_path, year)

100%|██████████| 12/12 [00:03<00:00,  3.20it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

Finished load data, start to merge data
start to merge chi_b
finished merge chi_b . It took 383.46381068229675
start to save chi_b
finished save chi_b . It took 3.766683578491211
start to save the mean and std of chi_b


 33%|███▎      | 1/3 [06:30<13:00, 390.37s/it]

finished mean, std of chi_b . It took 6.850441932678223


start to merge chi_c
finished merge chi_c . It took 251.6718237400055
start to save chi_c
finished save chi_c . It took 4.814077854156494
start to save the mean and std of chi_c


 67%|██████▋   | 2/3 [10:50<05:51, 351.26s/it]

finished mean, std of chi_c . It took 8.214295387268066


start to merge chi_h
finished merge chi_h . It took 71.93686389923096
start to save chi_h
finished save chi_h . It took 4.369882106781006
start to save the mean and std of chi_h


100%|██████████| 3/3 [12:10<00:00, 243.34s/it]
  0%|          | 0/12 [00:00<?, ?it/s]

finished mean, std of chi_h . It took 7.639942646026611




100%|██████████| 12/12 [00:02<00:00,  4.61it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

Finished load data, start to merge data
start to merge chi_b
finished merge chi_b . It took 23.45857071876526
start to save chi_b
finished save chi_b . It took 1.9912714958190918
start to save the mean and std of chi_b


 33%|███▎      | 1/3 [00:27<00:54, 27.43s/it]

finished mean, std of chi_b . It took 3.861482858657837


start to merge chi_c
finished merge chi_c . It took 24.980205297470093
start to save chi_c
finished save chi_c . It took 2.0180394649505615
start to save the mean and std of chi_c


 67%|██████▋   | 2/3 [00:56<00:27, 27.96s/it]

finished mean, std of chi_c . It took 4.152737617492676


start to merge chi_h
finished merge chi_h . It took 23.365269660949707
start to save chi_h
finished save chi_h . It took 1.8340437412261963
start to save the mean and std of chi_h


100%|██████████| 3/3 [01:23<00:00, 27.90s/it]

finished mean, std of chi_h . It took 3.6470909118652344







## calculate difference 

In [3]:
year = "2011"
mam4_path = "/data/keeling/a/zzheng25/d/mam4_paper_data/chi_only/mam4_chi/"
ml_path = "/data/keeling/a/zzheng25/d/mam4_paper_data/chi_only/ml_chi/"
save_path = "/data/keeling/a/zzheng25/d/mam4_paper_data/chi_only/mam4_minus_ml_chi/"

chi_ls = ["chi_b","chi_c","chi_h"]

for chi in tqdm(chi_ls):
    print("start to calculate",chi)
    t0 = time.time()
    # load the ds_mam4 and ds_ml
    ds_mam4 = xr.open_dataset(mam4_path+str(year)+"_"+chi+".nc")
    ds_ml = xr.open_dataset(ml_path+str(year)+"_"+chi+".nc")
    ds = ds_mam4-ds_ml
    del ds_mam4, ds_ml
    gc.collect()
    print("finished calculating",chi,". It took",time.time()-t0)

    print("start to save",chi)
    t0 = time.time()
    ds.to_netcdf(save_path+str(year)+"_"+chi+".nc")
    print("finished save",chi,". It took",time.time()-t0)

    print("start to save the mean, std, and SNR of",chi)
    # save mean and std
    ds_mean = ds.mean(dim="time")
    ds_mean.to_netcdf(save_path+str(year)+"_"+chi+"_mean.nc")
    ds_mean_abs = np.abs(ds).mean(dim="time")
    ds_mean_abs.to_netcdf(save_path+str(year)+"_"+chi+"_mean_abs.nc")
    ds_std = ds.std(dim="time")
    ds_std.to_netcdf(save_path+str(year)+"_"+chi+"_std.nc")
    ds_SNR = np.abs(ds_mean)/ds_std
    ds_SNR.to_netcdf(save_path+str(year)+"_"+chi+"_snr.nc")
    print("finished mean, std, SNR of",chi,". It took",time.time()-t0)

    del ds, ds_mean, ds_std
    gc.collect()
    print("\n")

  0%|          | 0/3 [00:00<?, ?it/s]

start to calculate chi_b
finished calculating chi_b . It took 4.8456597328186035
start to save chi_b
finished save chi_b . It took 3.839881181716919
start to save the mean, std, and SNR of chi_b


 33%|███▎      | 1/3 [00:13<00:27, 13.54s/it]

finished mean, std, SNR of chi_b . It took 8.629901885986328


start to calculate chi_c
finished calculating chi_c . It took 5.105469465255737
start to save chi_c
finished save chi_c . It took 3.3622400760650635
start to save the mean, std, and SNR of chi_c


 67%|██████▋   | 2/3 [00:26<00:13, 13.46s/it]

finished mean, std, SNR of chi_c . It took 8.099879264831543


start to calculate chi_h
finished calculating chi_h . It took 4.564489364624023
start to save chi_h
finished save chi_h . It took 3.8550920486450195
start to save the mean, std, and SNR of chi_h


100%|██████████| 3/3 [00:39<00:00, 13.33s/it]

finished mean, std, SNR of chi_h . It took 8.554002523422241





