In [1]:
import pandas as pd
from time import time
import numpy as np
from tqdm import tqdm
import os
import netCDF4 as nc
from progressbar import progressbar
from astropy.convolution import convolve
from astropy.convolution import Gaussian2DKernel
import datetime
from matplotlib import pyplot as plt, rcParams
from matplotlib import pyplot as plt
import cartopy.crs as ccrs
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import pandas as pd
import plotly.express as px
import seaborn as sns
import matplotlib
from scipy.ndimage import gaussian_filter
import xarray as xr
import random
from scipy.interpolate import LinearNDInterpolator
from itertools import product
from scipy.signal import savgol_filter
tqdm.pandas()

from matplotlib import pyplot as plt
from matplotlib.ticker import (AutoMinorLocator, MultipleLocator)
import numpy as np
from matplotlib.patches import Rectangle
from random import random
from scipy.signal import lombscargle

import matplotlib.ticker as mticker
from time import time

%matplotlib inline

In [2]:
def get_smap(path: str):
    ds = nc.Dataset(path)
    sm = ds['Soil_Moisture_Retrieval_Data_AM']

    latitudes = []
    longitudes = []
    moistures = []
    times = []
    qfs = []
    landcover_01 = []
    landcover_02 = []
    landcover_03 = []
    roughness = []
    surface_temp = []
    vo = []
    veg_wat_cont = []

    for lat in range(len(sm['latitude'])):
        for long in range(len(sm['longitude'][lat])):
            latitudes.append(sm['latitude'][lat][long])
            longitudes.append(sm['longitude'][lat][long])
            moistures.append(sm['soil_moisture'][lat][long])
            times.append(sm['tb_time_utc'][lat][long])
            qfs.append(sm['retrieval_qual_flag'][lat][long])
            roughness.append(sm['roughness_coefficient'][lat][long])
            surface_temp.append(sm['surface_temperature'][lat][long])
            vo.append(sm['vegetation_opacity'][lat][long])
            veg_wat_cont.append(sm['vegetation_water_content'][lat][long])
            
    df = pd.DataFrame.from_dict({'lat': latitudes, 'long': longitudes, 'time': times, 'smap_sm': moistures,
                                'retrieval_qfs': qfs, 'surface_roughness': roughness,
                                'surface_temp': surface_temp, 'vegetation_opacity': vo,
                                'vegetation_water_content': veg_wat_cont})

    # Filter out missing values
    smap_df = df[df['smap_sm'] != -9999.0]

    return smap_df


def get_smap_main(root_path: str, year: int, month: int, days: list) -> pd.DataFrame:
    first = True
    subdirs = []
    filenames = []

    for dir_name, subdir_list, file_list in os.walk(root_path):
        if first:
            subdirs = subdir_list
            first = False
        else:
            filenames.append(file_list[0])
    
    smap_df = pd.DataFrame()
    
    for i in progressbar(range(len(subdirs))):
        current_day = int(filenames[i].split('_')[4][6:8])
        current_month = int(filenames[i].split('_')[4][4:6])
        current_year = int(filenames[i].split('_')[4][:4])
        
        if (current_day in days) and (current_year == year) and (current_month == month):
            current_path = root_path + '/' + subdirs[i] + '/' + filenames[i]
            current_df = get_smap(current_path)
            smap_df = smap_df.append(current_df)
    
    smap_df = convert_time(smap_df)
    
    return smap_df


def conv(t):
    try:
        return pd.Timestamp(t)
    except:
        return pd.Timestamp(t.split('.')[0] + '.000Z')
    

def convert_time(df: pd.DataFrame) -> pd.DataFrame:
    ref_date = pd.Timestamp('2020-01-01T00:00:00.000Z')

    df['time'] = df['time'].apply(lambda t: conv(t))
    df['time'] = df['time'].apply(lambda t: (t - ref_date).days * 24 + (t - ref_date).seconds / 3600)
    return df


def get_plot_ticks(lat_values, long_values):
    min_lat = min(lat_values)
    max_lat = max(lat_values)
    min_long = min(long_values)
    max_long = max(long_values)
    
    lat_step_size = (max_lat - min_lat) / 3
    long_step_size = (max_long - min_long) / 3
    
    long_list = [min_long, min_long + long_step_size, min_long + 2 * long_step_size, max_long]
    lat_list = [min_lat, min_lat + lat_step_size, min_lat + 2 * lat_step_size, max_lat]
    
    # Rounding to two decimals
    long_list = [round(num, 2) for num in long_list]
    lat_list = [round(num, 2) for num in lat_list]
    
    return lat_list, long_list


def universal_plot(df, title=None, dot_size=0.5, fig_size=None) -> None:
    
    """
    fig=plt.figure()
    ax1 = plt.subplot(211)
    ax2 = plt.subplot(212, sharex = ax1)
    """
    
    fig = plt.figure(figsize=fig_size)
    
    ax = plt.axes(projection=ccrs.PlateCarree())
    ax.coastlines()
    lat_list, long_list = get_plot_ticks([90, -90], [-180, 180])
    
    ax.set_xticks(long_list, crs=ccrs.PlateCarree())
    ax.set_yticks(lat_list, crs=ccrs.PlateCarree())
    
    ax.xaxis.set_major_formatter(LongitudeFormatter())
    ax.yaxis.set_major_formatter(LatitudeFormatter())
    
    ax.set_extent([-180, 180, 90, -90], crs=ccrs.PlateCarree())
    
    plt.xticks(fontsize=16)
    plt.yticks(fontsize=16)

    plt.scatter(df['long'], df['lat'], s=dot_size)
    
    if title is not None:
        plt.title(title, fontsize=20, fontweight='book')
        
    plt.xlabel('Longitude', fontsize=18)
    plt.ylabel('Latitude', fontsize=18)
    
    ax.grid()
    
    plt.show()
    
    
def filter_cygnss_on_minutes(df, minutes_start, minutes_end, time_column_name='ddm_timestamp_utc'):
    
    return df[(df[time_column_name] >= minutes_start*60) & (df[time_column_name] <= minutes_end*60)]



def test_plot(df, times=None, titles=None, dot_size=0.5, fig_size=None, time_column_name='ddm_timestamp_utc') -> None:
    
    matplotlib.rc('xtick', labelsize=14) 
    matplotlib.rc('ytick', labelsize=14)

    lat_list, long_list = get_plot_ticks([90, -90], [-180, 180])

    # fig, (ax1, ax2) = plt.subplots(nrows=2, subplot_kw={'projection': ccrs.PlateCarree()}, sharex=True)
    fig, axes = plt.subplots(nrows=len(times), subplot_kw={'projection': ccrs.PlateCarree()}, figsize=fig_size, sharex=True)
    
    for i in range(len(times)):
        axes[i].coastlines()
        axes[i].set_xticks(long_list, crs=ccrs.PlateCarree())
        axes[i].set_yticks(lat_list, crs=ccrs.PlateCarree())
        
        axes[i].xaxis.set_major_formatter(LongitudeFormatter())
        axes[i].yaxis.set_major_formatter(LatitudeFormatter())
        
        axes[i].set_extent([-180, 180, 90, -90], crs=ccrs.PlateCarree())
        
        axes[i].grid(color='black', ls='-.')

        current_df = filter_cygnss_on_minutes(df, 0, times[i], time_column_name)
        
        axes[i].scatter(current_df['long'], current_df['lat'], s=dot_size)
        
        axes[i].set_title(titles[i], fontsize=18)
        if i == 1:
            axes[i].set_ylabel('Latitude', fontsize=18)
        
    plt.xlabel('Longitude', fontsize=18)
    
    plt.tight_layout()
    
    plt.show()
    
    

In [None]:
# Read Data

first_smap_path = '/Users/vegardhaneberg/Desktop/Global SMAP - 7 days/240798506/SMAP_L3_SM_P_20200101_R18290_003_HEGOUT.nc'
second_smap_path = '/Users/vegardhaneberg/Desktop/Global SMAP - 7 days/240799098/SMAP_L3_SM_P_20200102_R18290_003_HEGOUT.nc'
third_smap_path = '/Users/vegardhaneberg/Desktop/Global SMAP - 7 days/240799866/SMAP_L3_SM_P_20200103_R18290_003_HEGOUT.nc'

cygnss_path = '/Volumes/Seagate Ekstern Hardisk/CYGNSS Data/CYGNSS 2020-01/raw_main_df_2020_01_1of31.csv'

start = time()

print('Reading first smap day ...')
smap_df_first = get_smap(first_smap_path)
print('Reading second smap day ...')
smap_df_second = get_smap(second_smap_path)
print('Reading third smap day ...')
smap_df_third = get_smap(third_smap_path)

print('Reading CYGNSS 1st of Jan 2020 ...')
cygnss_df = pd.read_csv(cygnss_path)




Reading first smap day ...
Reading second smap day ...


In [None]:
# CYGNSS Plots

time1 = 1.5*60
time2 = 7*60
time3 = 24*60

titles = ['CYGNSS first 90 minutes of 2020',
          'CYGNSS first ' + str(round(time2/60)) + ' hours of 2020',
          'CYGNSS first ' + str(round(time3/60)) + ' hours of 2020']

f_size = (14, 10)
dot_size = 0.0001

test_plot(cygnss_df, 
          times=[time1, time2, time3], 
          titles=titles,
          dot_size=dot_size,
          fig_size=f_size)


In [None]:
# SMAP Plots
smap_dot_size_factor = 100

merged_smap_df = smap_df_first.append(smap_df_second).append(smap_df_third)
titles_smap = ['SMAP first 24 hours of 2020',
               'SMAP first 48 hours of 2020',
               'SMAP first 72 hours of 2020']

times = [24, 48, 72]

test_plot(merged_smap_df,
          times=times,
          titles=titles_smap,
          dot_size=dot_size*smap_dot_size_factor, 
          fig_size=f_size,
          time_column_name='seconds')


In [None]:
24*60*60

In [None]:
# CYGNSS Plots
f_size = (14, 4)
dot_size = 0.0001

time1 = 1.5*60
time2 = 7*60
time3 = 24*60

universal_plot(filter_cygnss_on_minutes(cygnss_df, 0, time1),
               title='CYGNSS first 90 minutes of 2020', 
               dot_size=dot_size, 
               fig_size=f_size)

universal_plot(filter_cygnss_on_minutes(cygnss_df, 0, time2),
               title='CYGNSS first ' + str(round(time2/60)) + ' hours of 2020', 
               dot_size=dot_size, 
               fig_size=f_size)

universal_plot(filter_cygnss_on_minutes(cygnss_df, 0, time3),
               title='CYGNSS first ' + str(round(time3/60)) + ' hours of 2020', 
               dot_size=dot_size, 
               fig_size=f_size)
