# Land and Ocean Difference

In [1]:
import numpy as np
import pandas as pd
import xarray as xr
from tqdm import tqdm
import gc
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import matplotlib.ticker as mticker
from util import *
import geopandas as gpd
from geopandas import GeoDataFrame as gdf

def open_chi(path):  
    ds=(xr.open_dataset(path)*100)
    ds=ds.assign_coords(lon=(((ds.lon + 180) % 360) - 180))
    ds=ds.reindex(lon=sorted(ds.lon))  
    return ds

In [2]:
year = "2011"
method_ls = ["MAM4","ML"]
chi_ls = ["chi_b","chi_c","chi_h"]

file_path = {}
file_path["MAM4"] = "/data/keeling/a/zzheng25/d/mam4_paper_data/chi_only/mam4_chi/"
file_path["ML"] = "/data/keeling/a/zzheng25/d/mam4_paper_data/chi_only/ml_chi/"
file_path["diff"] = "/data/keeling/a/zzheng25/d/mam4_paper_data/chi_only/mam4_minus_ml_chi/"
file_path["diff_abs"] = "/data/keeling/a/zzheng25/d/mam4_paper_data/chi_only/mam4_minus_ml_chi/"
mask_path = "/data/keeling/a/zzheng25/d/mam4_paper_data/chi_only/mask/"

per_ls = [
    "bc_a1_per","bc_a4_per",
    "dst_a1_per","dst_a2_per",
    "ncl_a1_per","ncl_a2_per",
    "pom_a1_per","pom_a4_per",
    "so4_a1_per","so4_a2_per",
    "soa_a1_per","soa_a2_per"
    ]

comp = open_nc("/data/keeling/a/zzheng25/d/mam4_paper_data/chi_only/comp_analysis/"+str(year)+"_year_comp.nc")\
       .to_dataframe()[per_ls].reset_index()

## load data

In [3]:
da={}
for chi in tqdm(chi_ls):
    da[chi]={}
    for method in method_ls:
        if method=="diff_abs":
            da_temp = open_chi(file_path[method]+str(year)+"_"+chi+"_mean_abs.nc")[chi]
            mask = open_chi(mask_path+str(year)+"_"+chi+".nc")["mask"]
            da[chi][method] = da_temp.where(mask)
        
        else:
            da_temp = open_chi(file_path[method]+str(year)+"_"+chi+"_mean.nc")[chi]
            mask = open_chi(mask_path+str(year)+"_"+chi+".nc")["mask"]
            da[chi][method] = da_temp.where(mask)
        
        del da_temp, mask
        gc.collect()

100%|██████████| 3/3 [00:02<00:00,  1.13it/s]


## Workflow

In [4]:
# get ocean and land data
def get_land_ocean(method, da):
    chi_b=da["chi_b"][method].to_dataframe().reset_index()
    chi_c=da["chi_c"][method].to_dataframe().reset_index()
    chi_h=da["chi_h"][method].to_dataframe().reset_index()

    merge_1=chi_b.merge(chi_c,on=["lat","lon"],how="outer")
    df=merge_1.merge(chi_h,on=["lat","lon"],how="outer")
    
    world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
    points= gdf(df.reset_index(), geometry=gpd.points_from_xy(df.reset_index().lon, df.reset_index().lat))
    points.crs="EPSG:4326"
    jps = gpd.sjoin(world, points, how='right', op='contains')
    vari_ls_new = ["lat","lon","chi_b","chi_c","chi_h"]
    land = jps[~jps["index_left"].isnull()][vari_ls_new]
    ocean = jps[jps["index_left"].isnull()][vari_ls_new]
    
    land_new = land.set_index(["lat","lon"]).stack()\
    .reset_index(name="mixing_state_index")\
    .rename(columns={'level_2':'mixing_state_type'})
    print("land")
    display(land_new.groupby(["mixing_state_type"])["mixing_state_index"].describe()["mean"])

    ocean_new = ocean.set_index(["lat","lon"]).stack()\
    .reset_index(name="mixing_state_index")\
    .rename(columns={'level_2':'mixing_state_type'})
    print("ocean")
    display(ocean_new.groupby(["mixing_state_type"])["mixing_state_index"].describe()["mean"])
    print("\n")
    
    return land_new, ocean_new

# comp analysis for land or ocean
def comp_analysis(df, chi, lat_min=-90, lat_max=90, lon_min=-180, lon_max=180, comp=comp):
    df_temp = df[(df["lat"]>=lat_min) & (df["lat"]<=lat_max) &
                 (df["lon"]>=lon_min) & (df["lon"]<=lon_max) & 
                 (df["mixing_state_type"]==chi)]
#     print(df_temp.shape)
    df_temp_comp=df_temp.merge(comp, on=["lat","lon"], how="inner")
#     print(df_temp_comp.shape)
    display(df_temp_comp.describe().transpose()["mean"])

In [5]:
print("MAM4")
method="MAM4"
l_mam4, o_mam4 = get_land_ocean(method, da)

print("ML")
method="ML"
l_ml, o_ml = get_land_ocean(method, da)

MAM4
land


mixing_state_type
chi_b    87.394463
chi_c    57.977361
chi_h    62.640074
Name: mean, dtype: float64

ocean


mixing_state_type
chi_b    85.287231
chi_c    58.574136
chi_h    62.896255
Name: mean, dtype: float64



ML
land


mixing_state_type
chi_b    77.070656
chi_c    55.056091
chi_h    60.661880
Name: mean, dtype: float64

ocean


mixing_state_type
chi_b    68.688721
chi_c    52.496044
chi_h    56.535778
Name: mean, dtype: float64





## chi_opt1

### MAM4

In [6]:
chi = "chi_b"; lat_min = 45; lat_max = 60
comp_analysis(o_mam4, chi, lat_min, lat_max)

lat                   52.805682
lon                  -26.260689
mixing_state_index    75.462173
bc_a1_per              1.010531
bc_a4_per              1.175600
dst_a1_per             1.223626
dst_a2_per             0.000267
ncl_a1_per            49.376904
ncl_a2_per             0.336654
pom_a1_per             6.336105
pom_a4_per             6.394899
so4_a1_per            22.630384
so4_a2_per             0.509159
soa_a1_per            10.941954
soa_a2_per             0.063918
Name: mean, dtype: float64

In [7]:
chi = "chi_b"; lat_min = -90; lat_max = -66.5
comp_analysis(l_mam4, chi, lat_min, lat_max)

lat                  -80.309031
lon                   21.365548
mixing_state_index    74.858360
bc_a1_per              0.359352
bc_a4_per              1.644499
dst_a1_per             2.121426
dst_a2_per             0.001814
ncl_a1_per            24.601838
ncl_a2_per             0.288057
pom_a1_per             2.313692
pom_a4_per             8.619451
so4_a1_per            38.988615
so4_a2_per            18.359612
soa_a1_per             2.640066
soa_a2_per             0.061579
Name: mean, dtype: float64