# Weekly trends plot for different years
* resampling frequency is 'freq' Days
* various rates of the chlor-a concentration
* composition plots for all years
* binned plots using 'week no.' during November to March for all years and individual years
* "fixed scale" seems more illustrative than the "min-max scale"

In [None]:
import xarray as xr
import numpy as np
import pandas as pd
%matplotlib inline
from matplotlib import pyplot as plt
from dask.diagnostics import ProgressBar
import seaborn as sns
from matplotlib.colors import LogNorm

In [None]:
# resampling frequency in number of days
freq=8

In [None]:
# load preprocessed float data, and start the interpolation right here!!!!
var7 = "chlrateweekno"
var6 = "sst4"
var5 = "par"
var4 = "t865"
var3 = "kd490"
var2 = "cdm"
var1 = "chl"
vardist = "dist"

indir_prefix = "../../data_collector_globcolour/output.data.interpolate/2017GDPfloat/" + "df_Globcolor_"
indir = indir_prefix + var1 + vardist + var2 + var3 + var4 + var5 + var6 + var7 + "_" + str(freq) + "d.csv"

floatDF_tmp = pd.read_csv(indir,index_col=0)
print(floatDF_tmp)


### plot for id 125776, which will be fit by LDS
plt.figure(figsize=(8,6))
floatDF_tmp[floatDF_tmp.id == 135776].plot(x='time', y ='chl_rate', title=('id - %d' % 135776) )
plt.show();
plt.close("all")

In [None]:
# https://stackoverflow.com/questions/16780014/import-file-from-parent-directory
# https://stackoverflow.com/questions/16771894/python-nameerror-global-name-file-is-not-defined
import os, sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath("__file__")))))

from tools import chl_rates  # del(chl_rates)
import importlib
importlib.reload(chl_rates)

In [None]:
print("\n ******* Reduce the Dataset to 'Nov-01 to Mar-31' ******* \n" )
floatsDF_NovMar = chl_rates.reduce_to_NovMar(floatDF_tmp)

print("\n ******* weekly plots and spatial plots on various rates of chl-a and log-scale chl-a ******* \n")
chl_rates.spatial_plots_chl_rate_weekly(floatsDF_NovMar )

In [None]:
floatsDF_NovMar['time'] = pd.to_datetime(floatsDF_NovMar['time']) # ,format='%m/%d/%y %I:%M%p'
floatsDF_NovMar = floatsDF_NovMar.set_index('time')
# add a new column to the dataset
floatsDF_NovMar['year'] = floatsDF_NovMar.index.year
floatsDF_NovMar[:20]

year_max = floatsDF_NovMar.index.year.max()
year_min = floatsDF_NovMar.index.year.min()
print('year_max', year_max)
print('year_min', year_min)
#print(df_timed)

In [None]:
### Part 1: Plot for the mean of the weekly trends
### Mean
start = year_min
for i in range(0,3):
    #plt.subplot(311+i)
    plt.figure(figsize=(8,6))
    plt.title("mean of the weekly-binned standardized Lagrangian rate of change per day of Chl-a", fontsize=10)
    for current_year in range(start, start+5):
        mask = floatsDF_NovMar.year == current_year
        df_yearly = floatsDF_NovMar[mask]
        #print('\n summary statistics of chl_rate_week for the year %d \n' % (current_year) )
        #print(df_yearly.chl_rate_week.describe())
        axes1=df_yearly.groupby(['week_rotate'])['chl_rate_stand'].mean().plot(linestyle="-", linewidth=0.8,  label='%d' % current_year  )

    start = start + 5
    axes1.set_ylim(-1.5, 1)
    #axes1.set_title("Year " + str(current_year) + ": Line plot of the weekly data on the rate of change per week of the $Chl_a$ Concentration",  fontsize=10)
    plt.xlabel('week', fontsize=10)
    plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 \cdot days)$', fontsize=10)
    plt.yticks(np.arange(-1.5, 1, 0.25))
    plt.xticks(np.arange(1, 25, 1))

    axes1.legend(bbox_to_anchor=(1.15, 1.05))
    #plt.savefig(str(freq)+"D_weekly_mean_" + str(i) +".pdf")
    plt.show()
    
    plt.close()

In [None]:
### Part 4: Plot for the 75% quantile of the weekly trends
### 85% quantile
start = year_min
for i in range(0,3):
    #plt.subplot(311+i)
    plt.figure(figsize=(8,6))
    plt.title("85% of the weekly-binned standardized Lagrangian rate of change per day of Chl-a", fontsize=10)
    for current_year in range(start, start+5):
        mask = floatsDF_NovMar.year == current_year
        df_yearly = floatsDF_NovMar[mask]
        #print('\n summary statistics of chl_rate_week for the year %d \n' % (current_year) )
        #print(df_yearly.chl_rate_week.describe())
        axes1=df_yearly.groupby(['week_rotate'])['chl_rate_stand'].quantile(.85).plot(linestyle="-", linewidth=0.8,  label='%d' % current_year  )

    start = start + 4
    axes1.set_ylim(-1, 1)
    #axes1.set_title("Year " + str(current_year) + ": Line plot of the weekly data on the rate of change per week of the $Chl_a$ Concentration",  fontsize=10)
    plt.xlabel('week', fontsize=10)
    plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 \cdot days)$', fontsize=10)
    plt.yticks(np.arange(-1, 1, 0.25))
    plt.xticks(np.arange(1, 25, 1))

    axes1.legend(bbox_to_anchor=(1.15, 1.05))
    #plt.savefig(str(freq)+"D_weekly_quantile(75)_" + str(i) +".pdf")
    plt.show()  
    plt.close()

In [None]:
### Part 2: Plot for the median of the weekly trends
### Median
start = year_min
for i in range(0,3):
    #plt.subplot(311+i)
    plt.figure(figsize=(8,6))
    plt.title("median of the weekly-binned standardized Lagrangian rate of change per day of Chl-a", fontsize=10)
    for current_year in range(start, start+5):
        mask = floatsDF_NovMar.year == current_year
        df_yearly = floatsDF_NovMar[mask]
        #print('\n summary statistics of chl_rate_week for the year %d \n' % (current_year) )
        #print(df_yearly.chl_rate_week.describe())
        axes1=df_yearly.groupby(['week_rotate'])['chl_rate_stand'].quantile(.50).plot(linestyle="-", linewidth=0.8,  label='%d' % current_year  )

    start = start + 4
    axes1.set_ylim(-1, 1)
    #axes1.set_title("Year " + str(current_year) + ": Line plot of the weekly data on the rate of change per week of the $Chl_a$ Concentration",  fontsize=10)
    plt.xlabel('week', fontsize=10)
    plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 \cdot days)$', fontsize=10)
    plt.yticks(np.arange(-1, 1, 0.25))
    plt.xticks(np.arange(1, 25, 1))

    axes1.legend(bbox_to_anchor=(1.15, 1.05))
    #plt.savefig(str(freq)+"D_weekly_median_" + str(i) +".pdf")
    plt.show()  
    plt.close()

In [None]:
### Part 3: Plot for the 25% quantile of the weekly trends
### 15% quantile
start = year_min
for i in range(0,3):
    #plt.subplot(311+i)
    plt.figure(figsize=(8,6))
    plt.title("15% quantile of the weekly-binned standardized Lagrangian rate of change per day of Chl-a", fontsize=10)
    for current_year in range(start, start+5):
        mask = floatsDF_NovMar.year == current_year
        df_yearly = floatsDF_NovMar[mask]
        #print('\n summary statistics of chl_rate_week for the year %d \n' % (current_year) )
        #print(df_yearly.chl_rate_week.describe())
        axes1=df_yearly.groupby(['week_rotate'])['chl_rate_stand'].quantile(.15).plot(linestyle="-", linewidth=0.8,  label='%d' % current_year  )

    start = start + 4
    axes1.set_ylim(-2, 1)
    #axes1.set_title("Year " + str(current_year) + ": Line plot of the weekly data on the rate of change per week of the $Chl_a$ Concentration",  fontsize=10)
    plt.xlabel('week', fontsize=10)
    plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 \cdot days)$', fontsize=10)
    plt.yticks(np.arange(-2, 1, 0.25))
    plt.xticks(np.arange(1, 25, 1))

    axes1.legend(bbox_to_anchor=(1.15, 1.05))
    #plt.savefig(str(freq)+"D_weekly_quantile(25)_" + str(i) +".pdf")
    plt.show()  
    plt.close()

In [None]:
#matplotlib.pyplot.close("all")
plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()

In [None]:
year_max = floatsDF_NovMar.index.year.max()
year_min = floatsDF_NovMar.index.year.min()
print('year_max', year_max)
print('year_min', year_min)
#print(df_timed)


In [None]:
# [print(i) for i in range(year_min,year_max+1)]
# for  each year


for current_year in range(year_min, year_max+1):
    mask = floatsDF_NovMar.year == current_year
    df_yearly = floatsDF_NovMar[mask]

    print('\n summary statistics of chl_rate_stand for the year %d \n' % (current_year) )
    print(df_yearly.chl_rate_stand.describe())


    plt.figure(figsize=(8,6))
    axes1=df_yearly.groupby(['week_rotate'])['chl_rate_stand'].mean().plot(linestyle="-",color='b', linewidth=1)
    df_yearly.groupby(['week_rotate'])['chl_rate_stand'].quantile(.85).plot(linestyle="--",color='g', linewidth=0.35)
    df_yearly.groupby(['week_rotate'])['chl_rate_stand'].quantile(.50).plot(linestyle="--",color='r', linewidth=0.75)
    df_yearly.groupby(['week_rotate'])['chl_rate_stand'].quantile(.15).plot(linestyle="--",color='g', linewidth=0.35)
    axes1.set_ylim(-2, 2)
    axes1.set_title("Year " + str(current_year) + ": Line plot of the weekly data on the standardized rate of change per day of the $Chl_a$ Concentration",  fontsize=10)
    plt.xlabel('week', fontsize=10)
    plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 \cdot day)$', fontsize=10)
    plt.yticks(np.arange(-2, 2, 0.25))
    plt.xticks(np.arange(1, 25, 1))
    plt.show()
    plt.close()


    # http://pandas.pydata.org/pandas-docs/version/0.19.1/visualization.html
    #http://blog.bharatbhole.com/creating-boxplots-with-matplotlib/
    plt.figure(figsize=(8,6))
    axes2 = df_yearly.boxplot(column='chl_rate_stand', by='week_rotate')
    axes2.set_ylim(-2, 2)
    #axes2.set_title("Box plot of the weekly data \n on the rate of change per week of the $Chl_a$ Concentration",  fontsize=10)
    plt.title("Year " + str(current_year) + ": Box plot of the weekly data \n on the standardized rate of change per day of the $Chl_a$ Concentration",  fontsize=10)
    plt.suptitle("") # remove auto-title
    plt.xlabel('week', fontsize=10)
    plt.ylabel('rate of change of the $Chl_a$ in $mg/(m^3 \cdot day)$', fontsize=10)
    plt.yticks(np.arange(-2, 2, 0.25))
    plt.xticks(np.arange(1, 25, 1))
    plt.show()
    plt.close()
    # the rate of change is slower on the regular scale

#matplotlib.pyplot.close("all")
plt.close('all')
plt.cla()  # axis
plt.clf()  # figure
plt.show()