In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.io as pio

import glob

from sklearn.cluster import KMeans
from tsmoothie import LowessSmoother, ExponentialSmoother
from pyprojroot import here

pd.set_option('display.max_columns', None)

ROOT_DIR = str(here())
insar_dir = '/Users/vitorro/Repositories/dario/data/raw/insar/'
data_dir = '/Users/vitorro/Repositories/dario/data/interim/'

pio.templates.default = 'plotly'

files = glob.glob(insar_dir + '/**/*.csv', recursive=True)
for file in sorted(files):
    print(file)


def interpolate_displacement(df):
    interpolated_df = df.set_index('timestamp').resample('6D').ffill()
    interpolated_df['displacement'] = (
                                       df[['timestamp','displacement']].set_index('timestamp')
                                                                       .resample('6D')
                                                                       .interpolate(method='linear')
                                      )
    return interpolated_df

def smoothing(frac):
    def smoothing_(x):
        lowess_smoother = LowessSmoother(smooth_fraction=frac, iterations=1) #0.075 
        lowess_smoother.smooth(x)
        return lowess_smoother.smooth_data[0]
    return smoothing_


In [None]:
# Loading original file

# df_orig = pd.read_csv(insar_dir+"D1/L2B_066_0716_IW3_VV.csv") # Gjerdrun D1
# df_orig = pd.read_csv(insar_dir+"/A1/L2B_117_0350_IW3_VV.csv") # New Prorsgrunn A1
# df_orig = pd.read_csv(insar_dir+"/A1/L2B_117_0345_IW2_VV.csv") # New Kristiansand A1
# df_orig = pd.read_csv(insar_dir+"D2/L2B_139_0696_IW3_VV.csv") # New Trondheim D2
# df_orig = pd.read_csv(insar_dir + "A1/L2B_146_0377_IW2_VV.csv") # New Trondheim A1
df_orig = pd.read_csv(insar_dir + "D1/L2B_037_0695_IW2_VV.csv") # New Trondheim D1
# df_orig = pd.read_csv(insar_dir+"A1/146_0377_iw2_vv.csv") # Trondheim A1
# df_orig = pd.read_csv(insar_dir+"D2/139_0696_iw3_vv.csv") # TD2
# df_orig = pd.read_csv(insar_dir + "D1/037_0695_iw2_vv.csv") # Trondheim
# df_orig = pd.read_csv(insar_dir+"D2/168_0743_iw2_vv.csv") # Malmo D2
# df_orig = pd.read_csv(insar_dir+"066_0742_iw1_vv.csv")  # Malmo D1<
# # df_orig = pd.read_csv(insar_dir+"066_0744_iw1_vv.csv")

fig = px.density_heatmap(x=df_orig.longitude, y=df_orig.latitude, nbinsx = 100, nbinsy=100, width=1000, height=800)
fig.show()

In [None]:
# SELECT AND FORMAT DATA
df = df_orig.copy()

lat_min, lat_max, lon_min, lon_max = (63.4182, 63.4220, 10.3858, 10.3946) # St. Olavs

# lat_min, lat_max, lon_min, lon_max = (60.02, 60.25, 10.9, 11.4) # Gjerdrum

# lat_min, lat_max, lon_min, lon_max = (59.10, 59.20, 9.55, 9.74) # 1 - Porsgrunn
# lat_min, lat_max, lon_min, lon_max = (58.13, 58.20, 7.9, 8.1) # 1 - Kristiansand
# lat_min, lat_max, lon_min, lon_max = (63.41, 63.46, 10.36,10.50) # 1 - Trondheim

# OLD
# lat_min, lat_max, lon_min, lon_max = (55.55, 55.58, 12.9,13.1) # 1 - df_smoothed_01
# lat_min, lat_max, lon_min, lon_max = (55.58, 55.6, 12.9,13.1) # 2 - df_smoothed_02.parq
# lat_min, lat_max, lon_min, lon_max = (55.37, 55.42, 12.7, 13.1) # 3 - df_smoothed_03.parq

df = df[ (df.longitude>lon_min) & (df.longitude<=lon_max) &
            (df.latitude>lat_min) & (df.latitude<=lat_max)  ]

df = df[~((df.latitude>60.2356)&(df.longitude>11.1388))] # Cutting of Råholt from Gjerdrum

fig = px.density_heatmap(x=df.longitude, y=df.latitude, nbinsx = 100, nbinsy=100, width=500, height=00)
fig.show()

# Selection relevant columns
date_cols = sorted([col for col in df.columns if "20" in col]) #columns named after timestamps
keep_cols = date_cols #list with variables to keep from dataframe
id_cols = ['pid', 'latitude', 'longitude', 'easting', 'northing', 'mean_velocity']
keep_cols.extend(id_cols)
df = df[keep_cols]  #replacing old df for memory efficiency
# df_originals.append(df)

# Formatting from wide to tall dataframe
# Uses a single column for timestamp and a column for displacement
# Number of rows = number of pixels * number of timestamps
df = df.melt(id_vars=id_cols, value_vars=date_cols,
                var_name='timestamp', value_name='displacement').sort_values('pid')
df.timestamp = pd.to_datetime(df.timestamp)

# RETRO: based on gap before 2016.06
df = df[df.timestamp>='2016-06-01'].copy()
df.reset_index(drop=True, inplace=True)
df.sort_values(['pid','timestamp'], inplace=True)

# CLUSTERING PIXELS (to work with smaller groups at once later)

average_size = 1000
nodes_full = df.drop_duplicates(['pid'])[['pid', 'easting','northing']]
nodes_full['cluster'] = KMeans(n_clusters=nodes_full.shape[0]//average_size).fit_predict(nodes_full[['northing','easting']])
df = df.merge(nodes_full[['pid','cluster']], how='left', on='pid')

print(f'{df.pid.nunique()} nodes')

In [None]:
# INTERPOLATE MISSING TIMESTAMPS
df = (df.groupby('pid', as_index=False)
                .apply(interpolate_displacement)
                .reset_index().drop('level_0', axis=1)
                )

In [None]:
# APPLY SMOOTHNESS
df['smoothed'] = df.groupby('pid',as_index=False).displacement.transform(smoothing(50/df.timestamp.nunique()))
# df['smooth60'] = df.groupby('pid',as_index=False).displacement.transform(smoothing(60/df.timestamp.nunique()))

In [None]:
# SAVE

filename = 'df_StOlavs_D1L2B.parq'
df.to_parquet(data_dir + f'{filename}')

#### Extras

In [None]:
df['month'] = df.timestamp.dt.year.astype(str) + df.timestamp.dt.month.astype(str)
# df = df.drop_duplicates(['pid','month'], keep='last').reset_index(drop=True).copy()

In [None]:
df['motion'] = df.groupby('pid').smoothed.transform(lambda x: np.r_[0, np.diff(x)])

In [None]:
fig = px.line(df[df.pid==df.pid.unique()[0]], x='timestamp',
        y=['displacement', '2 months', '10 months', '12 months'],
        color_discrete_sequence=['skyblue', 'gray', 'red', 'limegreen'],
        width=1000, height=600)
fig.update_layout(font_family="Times New Roman", font_size=14)
fig.update_layout(xaxis={'showgrid':False})
fig.write_image(ROOT_DIR+"/models/outputs/figs/report/smoothness.png")
fig.show()