# Generate the `.nc` file for Analysis

This script is used to generate $\chi_{\rm{a}}$, $\chi_{\rm{o}}$, $\chi_{\rm{h}}$ with masks in different seasons

In [1]:
import warnings
warnings.filterwarnings('ignore')
import xarray
from string import ascii_letters
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import xarray as xr
import seaborn as sns
import gc
import time
#import util

def ms_plot(df,variable,cmap):
    # get new coordinate
    df["lon_new"] = np.where(df['lon'] <= 180, df['lon'], df['lon']-360)

    plt.figure(figsize=(18, 12))
    map = Basemap()
    map.drawcoastlines()
    map.drawstates()
    map.drawcountries()
    map.drawmeridians(range(0, 360, 30),labels=[True,False,False,True],fontsize=20)
    map.drawparallels(range(-90, 100, 30), labels=[True,False,False,True],fontsize=20)
    if cmap == "bwr":
        map.fillcontinents(zorder=0)
        plt.scatter(df["lon_new"], df["lat"], 
                c=df[variable],marker="s",
                s=33, cmap=cmap, vmin=-1, vmax=1)
    else:
        plt.scatter(df["lon_new"], df["lat"], 
                    c=df[variable],marker="s",
                    s=33, cmap=cmap, vmin=0, vmax=1)
        
        
    x,y = map(df["lon"],df["lat"])    
    map.contour(x,y,df[variable])
    clb = plt.colorbar(orientation="vertical",fraction = 0.023, pad=0.01)
    clb.ax.tick_params(labelsize=20)
    #plt.colorbar(orientation="horizontal",fraction=0.046, pad=0.1)
    plt.show()
    
    
def process_ts(res_ls, chi_method_ls, chi_ls, season, year, variable_ls, verbose=False, raw_list=False):
    """
    res_ls=["f19"]
    chi_method_ls=["pred","cal"]
    chi_ls=["chi_abd","chi_hyg","chi_opt1","chi_opt2"]
    season=[1,2,3]
    
    verbose: print process or not
    raw_list: show the raw month data list, e.g., dist["f19"]["cal"]["chi_opt1_ls"]
    
    The results is in format: ["cal"]["chi_opt1"]
    """
    ds_dict={}
    for res in res_ls:
        ds_dict[res]={}
        if verbose == True:
            print("Start",res)
        for chi_method in chi_method_ls:
            if verbose == True:
                print("Start method:",chi_method) 
            ds_dict[res][chi_method]=[]
            ds_dict["temp"]=[]
            # loop and append different months
            for i in season:
                month=str(i).zfill(2)
                if verbose == True:
                    print("start the month:",month)
                print("path is:","/data/keeling/a/zzheng25/a/PartMC-MAM4/PartMC-ML-updated-predictions/"+month+".nc")
                ds=xr.open_dataset("/data/keeling/a/zzheng25/a/PartMC-MAM4/PartMC-ML-updated-predictions/"+month+".nc")
                # each month contains four mixing state indexs
                ds_dict[res][chi_method].append(ds[variable_ls])
            
            
            ds_dict["temp"]=xr.merge(ds_dict[res][chi_method])
            del ds_dict[res][chi_method]
            gc.collect()
                    
            ds_dict[res][chi_method]=ds_dict["temp"]
            del ds_dict["temp"]
            gc.collect()
            
            
            if verbose == True:
                print("finished", chi_method,"\n")
    return ds_dict

def workflow(season_ls, res_ls, chi_method_ls, chi_ls, year, variable_ls):
    df_ls=[]
    for season in season_ls:
        season_name=season
        print("Start Season:", season_name)
        df_temp_ls=[]
        t0=time.time()
        df_dict=process_ts(res_ls, chi_method_ls, chi_ls, season=season_month[season], year=year, variable_ls=variable_ls)
        print("It took",time.time()-t0,"to get data for this season")

        ######################################################################
        ## start chi_abd
        chi="chi_abd"
        print("start convert:",chi)
        ds=df_dict[res_ls[0]]["pred"]
        df=ds[[chi,"Mass_bc","Mass_dst","Mass_ncl","Mass_pom","Mass_soa","Mass_so4"]].to_dataframe()
        print("Before process")
        print(df.describe())
        # select the s1,s2,and chi
        df[chi]=df[chi]*100
        print("after process")
        print(df.describe())
        df_temp_ls.append(df.reset_index().groupby(["lat","lon"]).mean())
        del ds, df
        gc.collect()
        print("finish",chi)

        ## start chi_opt1
        chi="chi_opt1"
        print("start convert:",chi)
        ds=df_dict[res_ls[0]]["pred"]
        ds["s1_opt1"] = ds["Mass_bc"]
        ds["s2_opt1"]  = ds["Mass_dst"] + ds["Mass_ncl"] + ds["Mass_pom"] + ds["Mass_soa"] + ds["Mass_so4"]
        df=ds[["s1_opt1","s2_opt1",chi]].to_dataframe()
        print("Before process")
        print(df.describe())
        # select the s1,s2,and chi
        #df=df[df["s1_opt1"]>df["s1_opt1"].quantile(.05)]
        #df=df[df["s2_opt1"]>df["s2_opt1"].quantile(.05)]
        df[chi]=df[chi]*100
        print("after process")
        print(df.describe())
        df_temp_ls.append(df[[chi,"s1_opt1","s2_opt1"]].reset_index().groupby(["lat","lon"]).mean())
        del ds, df
        gc.collect()
        print("finish",chi)
        
        ## start chi_hyg
        chi="chi_hyg"
        print("start convert:",chi)
        ds=df_dict[res_ls[0]]["pred"]
        ds["s1_hyg"] = ds["Mass_bc"] + ds["Mass_dst"] + ds["Mass_pom"]
        ds["s2_hyg"] = ds["Mass_ncl"] + ds["Mass_soa"] + ds["Mass_so4"]
        df=ds[["s1_hyg","s2_hyg",chi]].to_dataframe()
        print("Before process")
        print(df.describe())
        # select the s1,s2,and chi
        #df=df[df["s1_hyg"]>df["s1_hyg"].quantile(.05)]
        #df=df[df["s2_hyg"]>df["s2_hyg"].quantile(.05)]
        df[chi]=df[chi]*100
        print("after process")
        print(df.describe())
        df_temp_ls.append(df[[chi,"s1_hyg","s2_hyg"]].reset_index().groupby(["lat","lon"]).mean())
        del ds, df
        gc.collect()
        print("finish",chi)
        ######################################################################
        
        
        df_temp=pd.concat(df_temp_ls,axis=1)
        df_temp["season"]=season_name
        
        #del df_temp_ls, df_dict
        #gc.collect
        #print("\n")
        df_ls.append(df_temp)
        del df_temp
        gc.collect()
    
    df=pd.concat(df_ls)
    
    return df

## Step 1: get the `.nc` file without mask

In [2]:
%%time
res_ls=["f09"]
chi_method_ls=["pred"]
chi_ls=["chi_abd","chi_hyg","chi_opt1"]
variable_ls=['Mass_so4','Mass_bc','Mass_ncl','Mass_dst','Mass_pom','Mass_soa',
             'chi_hyg','chi_opt1','chi_abd']

season_ls = ["DJF",
             "JJA"]

season_month={
    "DJF":[12,1,2],
    "JJA":[6,7,8]
}

df_temp=workflow(season_ls=season_ls, res_ls=res_ls, 
         chi_method_ls=chi_method_ls, chi_ls=chi_ls, 
         year="2011_spinup", variable_ls=variable_ls)

df_temp.reset_index().set_index(["lat","lon","season"])\
.to_xarray().to_netcdf("../nc_files/f09.nc")

Start Season: DJF
path is: /data/keeling/a/zzheng25/a/PartMC-MAM4/PartMC-ML-updated-predictions/12.nc
path is: /data/keeling/a/zzheng25/a/PartMC-MAM4/PartMC-ML-updated-predictions/01.nc
path is: /data/keeling/a/zzheng25/a/PartMC-MAM4/PartMC-ML-updated-predictions/02.nc
It took 16.324054479599 to get data for this season
start convert: chi_abd
Before process
            chi_abd       Mass_bc      Mass_dst      Mass_ncl      Mass_pom  \
count  3.981312e+07  3.981312e+07  3.981312e+07  3.981312e+07  3.981312e+07   
mean   6.449360e-01  1.045765e-10  2.893112e-10  3.856882e-10  4.263437e-10   
std    1.441138e-01  5.333924e-10  2.248846e-09  6.056717e-10  2.185833e-09   
min    0.000000e+00  9.057785e-17  1.572680e-30  3.897496e-22  1.828903e-16   
25%    5.535300e-01  3.802080e-13  6.789547e-14  3.338701e-12  1.446295e-12   
50%    6.578728e-01  7.275727e-12  1.105378e-12  1.232571e-10  2.333841e-11   
75%    7.572315e-01  3.337169e-11  1.125009e-11  5.555466e-10  1.168953e-10   
max    1

             s1_hyg        s2_hyg       chi_hyg
count  4.069786e+07  4.069786e+07  4.069786e+07
mean   7.989762e-10  1.489334e-09  5.878309e+01
std    3.708642e-09  2.782900e-09  1.485052e+01
min    7.009150e-15  7.336979e-17  0.000000e+00
25%    6.405370e-12  4.167467e-11  4.907626e+01
50%    4.523225e-11  6.092352e-10  5.755573e+01
75%    3.025035e-10  1.630228e-09  6.859579e+01
max    1.234627e-06  1.309168e-07  1.000000e+02
finish chi_hyg
CPU times: user 2min 10s, sys: 37.2 s, total: 2min 47s
Wall time: 2min 56s


## Step 2: create masks

In [2]:
res_name = "f09"
ds=xr.open_dataset("../nc_files/"+res_name+".nc")
#ds=xr.open_dataset("./f09.nc")
ds["opt1_per"] = ds["s1_opt1"]/(ds["s1_opt1"]+ds["s2_opt1"])
ds["hyg_per"] = ds["s1_hyg"]/(ds["s1_hyg"]+ds["s2_hyg"])
ds["Mass_all"] = ds["Mass_bc"]+ds["Mass_dst"]+ds["Mass_ncl"]+ds["Mass_pom"]+ds["Mass_soa"]+ds["Mass_so4"]
for var in ["bc","dst","ncl","pom","soa","so4"]:
    ds[var+"_per"]=ds["Mass_"+var]/ds["Mass_all"]

ds_s=ds[["chi_abd","chi_opt1","chi_hyg",
         "opt1_per","hyg_per",
         "bc_per","dst_per","ncl_per","pom_per","soa_per","so4_per"]]
df_s=ds_s.to_dataframe()

for chi in ["hyg"]:
    df_s["chi_"+chi][(df_s[chi+"_per"]<0.025) | (df_s[chi+"_per"]>0.975)]=np.nan

for chi in ["opt1"]:
    df_s["chi_"+chi][(df_s[chi+"_per"]<0.01) | (df_s[chi+"_per"]>0.99)]=np.nan

df_s["chi_abd"][(df_s["bc_per"] > 0.975)|
                (df_s["dst_per"] > 0.975)|
                (df_s["ncl_per"] > 0.975)|
                (df_s["pom_per"] > 0.975)| 
                (df_s["soa_per"] > 0.975)| 
                (df_s["so4_per"] > 0.975)] = np.nan

df_s.to_xarray().to_netcdf("../nc_files/"+res_name+"_mask.nc")
print("Save at: ../nc_files/"+res_name+"_mask.nc")

del ds, ds_s, df_s
gc.collect()

Save at: ../nc_files/f09_mask.nc


52