In [1]:
import pandas as pd
import numpy as np
import pymannkendall as mk
import statsmodels.api as sm
import os


In [2]:
results_folder = r'../example/results/mann-kendall'
stations_csv_file = r'../example/data/Stations.csv'
precipitations_daily_ts_parquet = r'precipitations_daily_time_series.parquet' 

if not os.path.exists(results_folder):
    os.mkdir(results_folder)

In [3]:
df = pd.read_parquet(precipitations_daily_ts_parquet)
df_stations = pd.read_csv(stations_csv_file)

In [4]:
stations_dict = { x['Id'] : x['Name']  for x in df_stations.to_dict(orient='records')}
df['StationName'] = df['StationId'].apply(lambda x: stations_dict[x])

In [5]:
mk_list_val = list()
for station, df_day in df.groupby('StationName'):
    data_df = df_day.set_index('DateTime')[['IR']]
    trend, h, p, z, Tau, s, var_s, slope, intercept = mk.original_test(data_df)
    mk_list_val.append([station, trend, h, p, z, Tau, s, var_s, slope, intercept])
    #print(mk.original_test(data_df, alpha=0.05))

df_mk = pd.DataFrame(mk_list_val, columns =['station', 'trend', 'h', 'p', 'z', 'Tau', 's', 'var_s', 'slope', 'intercept'])

mk_list_val = list()
for station, df_day in df.groupby('StationName'):
    data_df = df_day[df_day['IR']>2].set_index('DateTime')[['IR']]
    trend, h, p, z, Tau, s, var_s, slope, intercept = mk.original_test(data_df)
    mk_list_val.append([station, trend, h, p, z, Tau, s, var_s, slope, intercept])
    #print(mk.original_test(data_df, alpha=0.05))

df_mk_only_rains = pd.DataFrame(mk_list_val, columns =['station', 'trend', 'h', 'p', 'z', 'Tau', 's', 'var_s', 'slope', 'intercept'])


In [6]:
df_mk.to_excel(os.path.join(results_folder,'mk_original_test.xlsx'), index=False)
df_mk_only_rains.to_excel(os.path.join(results_folder,'mk_original_test_only_rains.xlsx'), index=False)