In [1]:
#| default_exp mplots

In [2]:
#| export
import time
from dataclasses import dataclass

@dataclass
class Time:
    time_start: float =  None
    time_end : float =  None
    time_total : float =  0.0

    def start(self): 
        self.time_start = time.time()
        return self.time_start

    def end(self):
        self.time_end = time.time()
        return self.time_end
        
    def duration(self):
        self.time_total=self.time_end - self.time_start
        return self.duration

In [3]:
#| hide
# Timer basic example
foo = Time()
foo.start()
time.sleep(2) 
foo.end()
foo.duration()

<bound method Time.duration of Time(time_start=1710152007.449831, time_end=1710152009.4521453, time_total=2.002314329147339)>

## Mplots functions

In [4]:
#| export
import pyscamp as scamp
import stumpy as stump 
import dvats.memory as mem
import numpy as np

In [5]:
#| hide 
foo = np.random.rand(10000)
foo_m = 50
print("Ts ~ ", foo.shape)
print("Expected mp length: ", int(np.floor(foo.shape[0]-foo_m+1)))

Ts ~  (10000,)
Expected mp length:  9951


In [26]:
#\ export 
def matrix_profile(
    data, 
    subsequence_len, 
    method = 'scamp', 
    print_flag = False, 
    debug = True, 
    timed = True
):
    print("--> matrix profile")
    duration = 0.0
    if timed: 
        timer = Time()
        timer.start()
    match method:
        case 'stumpy':
            if print_flag: print("--> Stumpy")
            mp = stump.gpu_stump(data, subsequence_len)
            index = [] #already included in mp
        case 'scamp': 
            if print_flag: print("--> Scamp")
            if debug: 
                print("Check gpu use")
                has_gpu_support = scamp.gpu_supported()
                print(has_gpu_support)
            mp, index = scamp.selfjoin(data, subsequence_len)
        case _: #default scamp
            if print_flag: print("--> Invalid method. Using scamp [default]")
            if debug: 
                has_gpu_support = scamp.gpu_supported()
                print(has_gpu_support)
            mp, index = scamp.selfjoin(data, subsequence_len)
            
    if timed: 
        timer.end()
        duration = timer.duration() 
    if print_flag: 
        if timed: 
            print(f"matrix profile {duration} seconds -->")
        else: 
            print("matrix profile -->")
    return mp, index, duration

In [27]:
matrix_profile(foo, foo_m, 'escampo', True)

--> matrix profile
--> Invalid method. Using scamp [default]
True
matrix profile <bound method Time.duration of Time(time_start=1710152439.3174796, time_end=1710152439.3983326, time_total=0.08085298538208008)> seconds -->


(array([7.1280007, 7.1532564, 7.052767 , ..., 6.8556924, 7.135168 ,
        6.948154 ], dtype=float32),
 array([3771, 3772, 7711, ..., 5512, 5513, 5148], dtype=int32),
 <bound method Time.duration of Time(time_start=1710152439.3174796, time_end=1710152439.3983326, time_total=0.08085298538208008)>)

In [28]:
#| hide
foo_mp_stumpy = matrix_profile(foo, foo_m, 'stumpy', True, True, True)

--> matrix profile
--> Stumpy
matrix profile <bound method Time.duration of Time(time_start=1710152460.3130288, time_end=1710152463.1483097, time_total=2.8352808952331543)> seconds -->


In [29]:
#| hide
foo_mp_stumpy = matrix_profile(foo, foo_m, 'escampo', True, True, True)

--> matrix profile
--> Invalid method. Using scamp [default]
True
matrix profile <bound method Time.duration of Time(time_start=1710152477.0583746, time_end=1710152477.1771278, time_total=0.11875319480895996)> seconds -->


## Plotting auxiliar functions

In [None]:
#| export
import dvats.load as load
import os
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as dates
from matplotlib.patches import Rectangle
import datetime as dt
from mpl_toolkits.axes_grid1 import ImageGrid
from matplotlib.gridspec import GridSpec
plt.style.use('https://raw.githubusercontent.com/TDAmeritrade/stumpy/main/docs/stumpy.mplstyle')

In [None]:
#| export
def plot_dataFrame(title, df, vars = [], interval = 10000):
    if len(vars > 0):
        num_vars = len(df.columns)
    
        for var_num in vars:
            if var_num >= len(df.columns):
                raise ValueError("var_num "+var[var_num] + "is out of the range of DataFrame columns: " + num_vars)

        num_vars = len(vars)
        
        ### Start the plot 

        #fig = plt.figure(figsize=(10, num_intervals * 3))  # Ajusta el tamaño del plot según el número de intervalos
        
        num_intervals = df.shape[0] // interval + 1
        fig = plt.figure(figsize=(10, num_vars * num_intervals * 3))  ## Size
        gs = GridSpec(num_intervals*num_vars, 1) # 1 column, len(vars) rows
        var_pos = 0
        for var_num  in vars:
            var_name = df.columns[var_num]
            data = df[var_name]
            for i in range(num_intervals):    
                start_idx = i * interval
                end_idx = len(data) if i == (num_intervals - 1) else start_idx + interval
                ax = fig.add_subplot(gs[var_pos+i])
                ax.plot(np.arange(start_idx, end_idx), data[start_idx:end_idx], label=f"{var_name} [{start_idx}, {end_idx}]")
                ax.set_title(f"{var_name} [{start_idx}, {end_idx}]")
                ax.set_xlabel("Time")
                ax.set_ylabel(var_name)
                ax.legend()
            var_pos += 1
                
        plt.suptitle(title, fontsize=16)
        plt.tight_layout()
        plt.show()
    else raise ValueError("No variable proposed for plotting")

In [None]:
#| export
def df_plot_colored_variables(df):
    # Show time series plot
    fig, ax = plt.subplots(1, figsize=(15,5), )
    cmap = matplotlib.colormaps.get_cmap('viridis')
    #df.plot(color=cmap(0.05), ax=ax) # or use colormap=cmap
    df.plot(colormap=cmap, ax=ax) # or use colormap=cmap
    # rect = Rectangle((5000, -4.2), 3000, 8.4, facecolor='lightgrey', alpha=0.5)
    # ax.add_patch(rect)
    plt.tight_layout()
    plt.legend()
    display(plt.show())

In [None]:
#| export
def plot_df_with_intervals_and_colors(title, df, interval=10000):
    num_variables = len(df.columns)
    num_intervals = len(df) // interval + 1  # Calcula el número necesario de intervalos/subplots

    fig = plt.figure(figsize=(10, num_intervals * 3 * num_variables))  # Ajusta el tamaño del plot
    gs = GridSpec(num_intervals * num_variables, 1)
    
    cmap = matplotlib.colormaps.get_cmap('viridis')

    for var_num, var in enumerate(df.columns):
        data = df[var]
        for i in range(num_intervals):
            ax = fig.add_subplot(gs[var_num * num_intervals + i])
            start_idx = i * interval
            end_idx = start_idx + interval

            if i == num_intervals - 1:  # Ajusta el último intervalo para incluir todos los datos restantes
                end_idx = len(data)

            color = cmap(var_num / num_variables)  # Asigna un color basado en la variable
            ax.plot(np.arange(start_idx, end_idx), data[start_idx:end_idx], label=f"{var} [{start_idx}, {end_idx}]", color=color)
            ax.set_title(f"{var} [{start_idx}, {end_idx}]")
            ax.set_xlabel("Index")
            ax.set_ylabel(var)
            ax.legend()

    plt.suptitle(title, fontsize=16)
    plt.tight_layout()
    plt.show()

In [None]:
#| export
def plot_motif(df, motif_idx, nearest_neighbor_idx, variable_name, title, padding = 1000, m = 1, mp = None):
    fig, axs = plt.subplots(2, sharex = True, gridspec_kw={'hspace': 0})
    plt.suptitle('Motif (Pattern) Discovery', fontsize='30')
    padding = min(padding, len(df[variable_name].values) // 2)

    # Calcula los límites para hacer zoom
    x_min = max(min(motif_idx, nearest_neighbor_idx) - padding, 0)
    x_max = min(max(motif_idx, nearest_neighbor_idx) + padding, len(df[variable_name].values))

    axs[0].plot(df[variable_name].values)
    axs[0].set_xlim([x_min, x_max])  # Aplica el zoom aquí
    axs[0].set_ylabel(title, fontsize='20')
        
    axs[0].set_ylabel(title, fontsize='20')
    rect = Rectangle((motif_idx, 0), m, 40, facecolor='lightgrey')
    axs[0].add_patch(rect)
    rect = Rectangle((nearest_neighbor_idx, 0), m, 40, facecolor='lightgrey')
    axs[0].add_patch(rect)
    axs[1].set_xlabel('Time', fontsize ='20')
    axs[1].set_ylabel('Matrix Profile', fontsize='20')
    axs[1].axvline(x=motif_idx, linestyle="dashed")
    axs[1].axvline(x=nearest_neighbor_idx, linestyle="dashed")
    axs[1].plot(mp)
    plt.show()

In [None]:
#| export
def plot_motif_separated(df, motif_idx=0, nearest_neighbor_idx=0, variable_name="", title="", padding=1000, m=1, mp=None):
    fig, axs = plt.subplots(4, sharex=False, figsize=( 12, 5), gridspec_kw={'hspace': 0.5})
    plt.suptitle('Motif (Pattern) Discovery', fontsize='20')
    
    padding = max(m, min(padding, len(df[variable_name].values) // 2))

    x_min_motif = max(motif_idx - padding, 0)
    x_max_motif = min(motif_idx + padding, len(df[variable_name].values))

    axs[0].plot(df[variable_name].values)
    axs[0].set_xlim([x_min_motif, x_max_motif])
    axs[0].set_ylabel(title, fontsize='10')
    rect_motif = Rectangle((motif_idx, df[variable_name].min()), m, df[variable_name].max() - df[variable_name].min(), facecolor='lightgrey')
    axs[0].add_patch(rect_motif)

    axs[1].plot(mp)
    axs[1].set_xlim([x_min_motif, x_max_motif])
    axs[1].set_xlabel('Time', fontsize='10')
    axs[1].set_ylabel('MP - Min', fontsize='10')
    axs[1].axvline(x=motif_idx, linestyle="dashed")

    x_min_neighbor = max(nearest_neighbor_idx - padding, 0)
    x_max_neighbor = min(nearest_neighbor_idx + padding, len(df[variable_name].values))

    axs[2].plot(df[variable_name].values)
    axs[2].set_xlim([x_min_neighbor, x_max_neighbor])
    axs[2].set_ylabel(title, fontsize='10')
    rect_neighbor = Rectangle((nearest_neighbor_idx, df[variable_name].min()), m, df[variable_name].max() - df[variable_name].min(), facecolor='lightgrey')
    axs[2].add_patch(rect_neighbor)

    axs[3].plot(mp)
    axs[3].set_xlim([x_min_neighbor, x_max_neighbor])
    axs[3].set_xlabel('Time', fontsize='10')
    axs[3].set_ylabel('MP-max', fontsize='10')
    axs[3].axvline(x=nearest_neighbor_idx, linestyle="dashed")

    plt.show()

# Download & load matrices

In [None]:
#| export
class GD_Mat:
    def __init__(self, id,  name, data_path = '~/data'):
        self.id = id
        self.data_path = os.path.expanduser(data_path)
        self.zip_path = os.path.join(self.data_path, name + '.zip')
        self.mat_path = os.path.join(self.data_path, name + '.mat')
        self.mats_files = None
        self.mats = None
        self.mats_df =  None
        self.num_mats = 0
        self.num_mats_extracted = 0
        
    def download_file_from_google_drive(self):
        return load.download_file_from_google_drive(self.id, self.zip_path)
        
    def get_mat_files(self):
        self.mats_files = [f for f in load.zip_contents(self.zip_path) if not f.startswith('__MACOSX/')]
        self.num_mats = len(self.mats_files)
        self.mats = self.mats_df = [None]*self.num_mats
    
    def unzip_mat(self, all_one, case = '', print_flag = False): 
        str = load.unzip_mat(all_one, self.zip_path, self.data_path, case, print_flag)
        self.get_mat_files()
        return str
        
    def mat2csv(self, case_id, print_flag = False):
        if print_flag: print("--> mat2csv", case_id)
        case = self.mats_files[case_id]
        case_path = os.path.join(self.data_path, case)
        print("Mat2csv case", case_path)
        self.mats_df[case_id] = load.mat2csv(case_path, self.data_path, print_flag)
        if print_flag: print("mat2csv", case_id, "-->")
        
    def __str__(self): 
        str = f"FileID: {self.id}\nData path: {self.data_path}\n"
        str += f"zip path: {self.zip_path}\nmat_files: {self.mats_files}"
        str +=f"\nnum_mats: {self.num_mats}"
        return str
    

In [None]:
#| hide 
file_id = '1qq1z2mVRd7PzDqX0TDAwY7BcWVjnXUfQ'
name = 'InsectData-fig11'

gd_mat = GD_Mat(file_id, name)
print(gd_mat)
gd_mat.unzip_mat('all')
print(gd_mat)
gd_mat.mat2csv(0)
print(gd_mat)
display(gd_mat.mats_df[0].head(5))

## Global variables

In [None]:
#| export
eamonn_drive_mplots = {
    'insects0': {
        'id': '1qq1z2mVRd7PzDqX0TDAwY7BcWVjnXUfQ',
        'name': 'InsectData-fig11'
    }
}

In [None]:
#| hide 
eammon_drive_mplots['insects0']['name']