In [None]:
%pip install prettytable
%pip install metpy

In [2]:
# importing required library
from prettytable import PrettyTable
import pandas as pd
import xarray as xr
import os
import metpy
from metpy.units import units
import numpy as np
from pathlib import Path

In [3]:
# Get the file names
basepath = Path('/home/jovyan/Landsat_SST_algorithm')
atmpath = basepath / 'Data/ERA5_atmprofiles/Atmospheres/'
sstpath = basepath / 'Data/ERA5_atmprofiles/SSTs/'

In [31]:
def open_ERA5(monthfile,path):
    # Open all files from one month into dataframes and concatenate
    ds = xr.open_dataset(path / monthfile)
    ds = ds.drop_sel(longitude=np.delete(ds.longitude, np.arange(0, ds.longitude.size, 4)))
    df = ds.to_dataframe()
    df = df.reset_index()
    
    # print(ds.dims['longitude']*ds.dims['latitude']*ds.dims['time'])
    
    # Needs to be all levels from one profile, then next profile, then concatenate
    df = df.sort_values(by=['longitude', 'latitude','time'])
    
    return df

In [5]:
# Create MODTRAN atm correction input files

In [47]:
months = ['01','02','03','04','05','06','07','08','09','10','11','12']

dir_list = os.listdir(atmpath)
sst_list = os.listdir(sstpath)

for i in months:
    
    # Prep atmospheric profiles
    # Find all files for a month
    monthfiles = [file for file in dir_list if file.endswith(f'{i}.nc')]
    print (monthfiles)
    
    dxs = []
    
    # Open each file and concatenate all files together
    for monthfile in monthfiles:
        dx = open_ERA5(monthfile,atmpath)
        dxs.append(dx)

    df = pd.concat(dxs, ignore_index=True)
    
    # Choose specific days and hours to thin the data - we chose Day 1 00h, Day 7 12h, Day 15 6h, Day 23 18h
    df = df[(df.time.dt.day==1)&(df.time.dt.hour==0)|(df.time.dt.day==7)&(df.time.dt.hour==12)|(df.time.dt.day==15)&(df.time.dt.hour==6)|(df.time.dt.day==23)&(df.time.dt.hour==18)]
    # print(f'Atm number: {df[df.level==1].shape[0]}')
    
    # Prep SSTs
    # Find all files for a month
    monthfiles = [file for file in sst_list if file.endswith(f'{i}.nc')]
    print (monthfiles)
    
    dxs = []
    
    # Open each file and concatenate all files together
    for monthfile in monthfiles:
        dx = open_ERA5(monthfile,sstpath)
        dxs.append(dx)

    sstf = pd.concat(dxs, ignore_index=True)
    
    # Choose specific days and hours to thin the data - Day 1 00h, Day 7 12h, Day 15 6h, Day 23 18h
    sstf = sstf[(sstf.time.dt.day==1)&(sstf.time.dt.hour==0)|(sstf.time.dt.day==7)&(sstf.time.dt.hour==12)|(sstf.time.dt.day==15)&(sstf.time.dt.hour==6)|(sstf.time.dt.day==23)&(sstf.time.dt.hour==18)]
    # print(f'SST number: {sstf.shape[0]}')
    
    # Remove measurements not over the ocean
    is_sst = sstf[sstf['sst'].notna()]
    
    # Convert to Celsius and remove measurements over frozen ocean
    is_sst.loc[:,'sst'] = is_sst['sst'] - 273.15
    is_sst = is_sst[is_sst['sst'] > -1.9]
    
    # Trim atmospheric profiles based on remaining SST measurements and merge the dataframes
    good_df = pd.merge(df,is_sst,how='inner',left_on=['longitude','latitude','time'],right_on=['longitude','latitude','time'])

    print(f'Max SST: {good_df.sst.max()}, Min: {good_df.sst.min()}, Size: {is_sst.shape[0]}')
    
    # Ensure the merge produced the expected output
    if is_sst.shape[0] != good_df[good_df['level']==1].shape[0]: 
        print ('SST and atm profiles do not match') 
        continue

    # Add units to level *slow
    good_df ['hPa'] = good_df['level'].apply(lambda x: x*units.hectopascal)

    # Convert hPa to height in km *takes a long time
    good_df['ht[km]'] = good_df['hPa'].apply(metpy.calc.pressure_to_height_std)
    good_df['ht'] = good_df['ht[km]'].apply(lambda x: x.magnitude)
    
    # Prep for output and save to file
    good_df = good_df.reset_index(drop=True)
    good_df['ht'] = np.around(good_df['ht'],1)
    good_df['t'] = np.around(good_df['t'],1)
    good_df['q'] = np.around(good_df['q'],7)
     
    # Save atmopheric profiles and SSTs
    outFile = basepath / f'Data/AtmCorrection/modtran_atmprofiles_{i}_20230823.txt'
    good_df[['ht','level','t','q']].to_csv(outFile,sep='\t',index=False,header=False, encoding='ascii')
    
    outFile = basepath / f'Data/AtmCorrection/modtran_sstprofiles_{i}_20230823.txt'
    is_sst[['sst']].to_csv(outFile,sep='\t',index=False,header=False, encoding='ascii')

['era5_daily_202001.nc', 'era5_daily_202101.nc']
['era5_SST_202101.nc', 'era5_SST_202001.nc']
Max SST: 2.794342041015625, Min: -1.6900634765625, Size: 1629


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  is_sst.loc[:,'sst'] = is_sst['sst'] - 273.15


['era5_daily_202102.nc', 'era5_daily_202002.nc']
['era5_SST_202002.nc', 'era5_SST_202102.nc']
Max SST: 2.923004150390625, Min: -1.743988037109375, Size: 1627


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  is_sst.loc[:,'sst'] = is_sst['sst'] - 273.15


['era5_daily_202003.nc', 'era5_daily_202103.nc']
['era5_SST_202103.nc', 'era5_SST_202003.nc']
Max SST: 3.113189697265625, Min: -1.85552978515625, Size: 1630


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  is_sst.loc[:,'sst'] = is_sst['sst'] - 273.15


['era5_daily_202104.nc', 'era5_daily_202004.nc']
['era5_SST_202004.nc', 'era5_SST_202104.nc']
Max SST: 1.909576416015625, Min: -1.690521240234375, Size: 1632


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  is_sst.loc[:,'sst'] = is_sst['sst'] - 273.15


['era5_daily_202005.nc', 'era5_daily_202105.nc']
['era5_SST_202105.nc', 'era5_SST_202005.nc']
Max SST: 1.647613525390625, Min: -1.690277099609375, Size: 1632


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  is_sst.loc[:,'sst'] = is_sst['sst'] - 273.15


['era5_daily_202106.nc', 'era5_daily_202006.nc']
['era5_SST_202006.nc', 'era5_SST_202106.nc']
Max SST: 1.391754150390625, Min: -1.692962646484375, Size: 1632


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  is_sst.loc[:,'sst'] = is_sst['sst'] - 273.15


['era5_daily_202007.nc', 'era5_daily_202107.nc']
['era5_SST_202107.nc', 'era5_SST_202007.nc']
Max SST: 1.225738525390625, Min: -1.7010498046875, Size: 1632


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  is_sst.loc[:,'sst'] = is_sst['sst'] - 273.15


['era5_daily_202108.nc', 'era5_daily_202008.nc']
['era5_SST_202008.nc', 'era5_SST_202108.nc']
Max SST: 0.786529541015625, Min: -1.693695068359375, Size: 1632


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  is_sst.loc[:,'sst'] = is_sst['sst'] - 273.15


['era5_daily_202009.nc', 'era5_daily_202109.nc']
['era5_SST_202109.nc', 'era5_SST_202009.nc']
Max SST: 0.901275634765625, Min: -1.6927490234375, Size: 1632


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  is_sst.loc[:,'sst'] = is_sst['sst'] - 273.15


['era5_daily_202010.nc', 'era5_daily_202110.nc']
['era5_SST_202110.nc', 'era5_SST_202010.nc']
Max SST: 0.891998291015625, Min: -1.709320068359375, Size: 1632


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  is_sst.loc[:,'sst'] = is_sst['sst'] - 273.15


['era5_daily_202111.nc', 'era5_daily_202011.nc']
['era5_SST_202011.nc', 'era5_SST_202111.nc']
Max SST: 0.677398681640625, Min: -1.7630615234375, Size: 1631


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  is_sst.loc[:,'sst'] = is_sst['sst'] - 273.15


['era5_daily_202012.nc', 'era5_daily_202112.nc']
['era5_SST_202112.nc', 'era5_SST_202012.nc']
Max SST: 1.318267822265625, Min: -1.691741943359375, Size: 1629


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  is_sst.loc[:,'sst'] = is_sst['sst'] - 273.15
