In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interact

# Load the dataset
file_path = 'merged_sentinel_ground_data2.csv'
data = pd.read_csv(file_path)

# Set the cs_cdf threshold
cs_cdf_threshold = 0.7

# List of remote sensing variables
remote_sensing_vars = ['B11_0','B11_1','B11_2','B11_3','B11_4','B11_5','B11_6','B11_7','B11_8',
                       'B12_0','B12_1','B12_2','B12_3','B12_4','B12_5','B12_6','B12_7','B12_8',
                       'B2_0','B2_1','B2_2','B2_3','B2_4','B2_5','B2_6','B2_7','B2_8',
                       'B3_0','B3_1','B3_2','B3_3','B3_4','B3_5','B3_6','B3_7','B3_8',
                       'B4_0','B4_1','B4_2','B4_3','B4_4','B4_5','B4_6','B4_7','B4_8',
                       'B8_0','B8_1','B8_2','B8_3','B8_4','B8_5','B8_6','B8_7','B8_8',
                       'B8A_0','B8A_1','B8A_2','B8A_3','B8A_4','B8A_5','B8A_6','B8A_7','B8A_8']

def plot_data(plot_type, x_var, y_var, log_x, log_y, window, filter_cloud, cloud_band, cloud_threshold, custom_var_expr, drop_zero):
    plt.figure(figsize=(15, 6))
    
    # Evaluate the custom variable expression if provided
    if custom_var_expr:
        try:
            data['custom_var'] = eval(custom_var_expr, {"data": data, "np": np})
            x_var = 'custom_var'
        except Exception as e:
            print(f"Error in custom variable expression: {e}")
            return
    
    # Determine if the selected variable is a remote sensing variable
    is_remote_sensing = any(var in remote_sensing_vars for var in [x_var, y_var])
    
    if is_remote_sensing:
        # Identify the point number from the variable name
        point_num = x_var.split('_')[-1] if 'custom_var' not in x_var else 'custom'
        # Determine the corresponding cloud score column
        cs_col = f'{cloud_band}_{point_num}' if point_num != 'custom' else f'{cloud_band}_0'
        # Filter out rows with NaN values in the selected columns
        if y_var != 'None':
            plot_data = data[[x_var, y_var, cs_col]].dropna()
        else:
            plot_data = data[[x_var, cs_col]].dropna()
        # Apply cloud score filtering if enabled
        if filter_cloud:
            plot_data = plot_data[plot_data[cs_col] >= cloud_threshold]
    else:
        # Filter out rows with NaN values in the selected columns without cloud score filtering
        if y_var != 'None':
            plot_data = data[[x_var, y_var]].dropna()
        else:
            plot_data = data[[x_var]].dropna()
    
    # Drop zero-valued data points if enabled
    if drop_zero:
        plot_data = plot_data[(plot_data[x_var] != 0)]
        if y_var != 'None':
            plot_data = plot_data[(plot_data[y_var] != 0)]
    
    # Convert timestamp to datetime
    plot_data['timestamp'] = pd.to_datetime(data['timestamp'])
    
    # Add a small positive constant to avoid log(0) or negative values
    epsilon = 1e-10
    
    if plot_type == 'Scatter Plot':
        if log_x:
            plot_data[x_var] = np.log(plot_data[x_var] + epsilon)
        if y_var != 'None' and log_y:
            plot_data[y_var] = np.log(plot_data[y_var] + epsilon)
        if y_var != 'None':
            plt.scatter(plot_data[x_var], plot_data[y_var], alpha=0.5)
            plt.xlabel(x_var)
            plt.ylabel(y_var)
            
            # Calculate and display the correlation
            correlation = plot_data[[x_var, y_var]].corr().iloc[0, 1]
            plt.title(f'{plot_type} of {x_var} and {y_var}\nCorrelation: {correlation:.2f}')
        else:
            plt.scatter(plot_data.index, plot_data[x_var], alpha=0.5)
            plt.xlabel('Index')
            plt.ylabel(x_var)
            plt.title(f'{plot_type} of {x_var}')
        
    elif plot_type == 'Time Series Plot':
        if log_x:
            plot_data[x_var] = np.log(plot_data[x_var] + epsilon)
        if y_var != 'None':
            fig, ax1 = plt.subplots(figsize=(15, 6))
            color = 'tab:blue'
            ax1.set_xlabel('Time')
            ax1.set_ylabel(x_var, color=color)
            ax1.plot(plot_data['timestamp'], plot_data[x_var].rolling(window=window).mean(), color=color, label=f'{x_var} (Moving Average)')
            ax1.tick_params(axis='y', labelcolor=color)

            ax2 = ax1.twinx()
            color = 'tab:red'
            if log_y:
                plot_data[y_var] = np.log(plot_data[y_var] + epsilon)
            ax2.set_ylabel(y_var, color=color)
            ax2.plot(plot_data['timestamp'], plot_data[y_var].rolling(window=window).mean(), color=color, label=f'{y_var} (Moving Average)')
            ax2.tick_params(axis='y', labelcolor=color)
            
            fig.legend(loc='upper right')
            plt.title(f'{plot_type} of {x_var} and {y_var}')
        else:
            plt.figure(figsize=(15, 6))
            plt.plot(plot_data['timestamp'], plot_data[x_var].rolling(window=window).mean(), label=f'{x_var} (Moving Average)')
            plt.xlabel('Time')
            plt.ylabel(x_var)
            plt.legend()
            plt.title(f'{plot_type} of {x_var}')
            
    plt.grid(True)
    plt.show()

variables = data.columns.tolist()
variables.remove('timestamp')

interact(plot_data, 
         plot_type=widgets.RadioButtons(options=['Scatter Plot', 'Time Series Plot'], description='Plot Type'),
         x_var=widgets.Dropdown(options=variables, description='X Variable'),
         y_var=widgets.Dropdown(options=['None'] + variables, description='Y Variable'),
         log_x=widgets.Checkbox(value=False, description='Log X Variable'),
         log_y=widgets.Checkbox(value=False, description='Log Y Variable'),
         window=widgets.IntSlider(value=5, min=1, max=50, step=1, description='Moving Avg Window'),
         filter_cloud=widgets.Checkbox(value=False, description='Filter by Cloud Score'),
         cloud_band=widgets.RadioButtons(options=['cs', 'cs_cdf'], description='Cloud Band'),
         cloud_threshold=widgets.FloatSlider(value=0.1, min=0.0, max=1.0, step=0.01, description='Cloud Threshold'),
         custom_var_expr=widgets.Text(value='', description='Custom Var Expr'),
         drop_zero=widgets.Checkbox(value=False, description='Drop Zero Values')
);


interactive(children=(RadioButtons(description='Plot Type', options=('Scatter Plot', 'Time Series Plot'), valu…

B4_8 vs TurbidityBuoy


In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 769 entries, 0 to 768
Columns: 121 entries, timestamp to Pstn(hPa)_EnvData
dtypes: float64(120), object(1)
memory usage: 727.1+ KB


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interact

# Load the dataset
file_path = 'merged_sentinel_ground_data2.csv'
data = pd.read_csv(file_path)

# Set the cs_cdf threshold
cs_cdf_threshold = 0.7

# List of remote sensing variables
remote_sensing_vars = ['B11_0','B11_1','B11_2','B11_3','B11_4','B11_5','B11_6','B11_7','B11_8',
                       'B12_0','B12_1','B12_2','B12_3','B12_4','B12_5','B12_6','B12_7','B12_8',
                       'B2_0','B2_1','B2_2','B2_3','B2_4','B2_5','B2_6','B2_7','B2_8',
                       'B3_0','B3_1','B3_2','B3_3','B3_4','B3_5','B3_6','B3_7','B3_8',
                       'B4_0','B4_1','B4_2','B4_3','B4_4','B4_5','B4_6','B4_7','B4_8',
                       'B8_0','B8_1','B8_2','B8_3','B8_4','B8_5','B8_6','B8_7','B8_8',
                       'B8A_0','B8A_1','B8A_2','B8A_3','B8A_4','B8A_5','B8A_6','B8A_7','B8A_8']

# Ensure the target columns are in the list of variables
variables = data.columns.tolist()
variables.remove('timestamp')

def plot_data(plot_type, x_var, y_var, log_x, log_y, window, filter_cloud, cloud_band, cloud_threshold, custom_var_expr, drop_zero):
    plt.figure(figsize=(15, 6))
    
    # Evaluate the custom variable expression if provided
    if custom_var_expr:
        try:
            data['custom_var'] = eval(custom_var_expr, {"data": data, "np": np})
            x_var = 'custom_var'
        except Exception as e:
            print(f"Error in custom variable expression: {e}")
            return
    
    # Determine if the selected variable is a remote sensing variable
    is_remote_sensing = any(var in remote_sensing_vars for var in [x_var, y_var])
    
    if is_remote_sensing:
        # Identify the point number from the variable name
        point_num = x_var.split('_')[-1] if 'custom_var' not in x_var else 'custom'
        # Determine the corresponding cloud score column
        cs_col = f'{cloud_band}_{point_num}' if point_num != 'custom' else f'{cloud_band}_0'
        # Filter out rows with NaN values in the selected columns
        if y_var != 'None':
            plot_data = data[[x_var, y_var, cs_col]].dropna()
        else:
            plot_data = data[[x_var, cs_col]].dropna()
        # Apply cloud score filtering if enabled
        if filter_cloud:
            plot_data = plot_data[plot_data[cs_col] >= cloud_threshold]
    else:
        # Filter out rows with NaN values in the selected columns without cloud score filtering
        if y_var != 'None':
            plot_data = data[[x_var, y_var]].dropna()
        else:
            plot_data = data[[x_var]].dropna()
    
    # Drop zero-valued data points if enabled
    if drop_zero:
        plot_data = plot_data[(plot_data[x_var] != 0)]
        if y_var != 'None':
            plot_data = plot_data[(plot_data[y_var] != 0)]
    
    # Convert timestamp to datetime
    plot_data['timestamp'] = pd.to_datetime(data['timestamp'])
    
    # Add a small positive constant to avoid log(0) or negative values
    epsilon = 1e-10
    
    if plot_type == 'Scatter Plot':
        if log_x:
            plot_data[x_var] = np.log(plot_data[x_var] + epsilon)
        if y_var != 'None' and log_y:
            plot_data[y_var] = np.log(plot_data[y_var] + epsilon)
        if y_var != 'None':
            plt.scatter(plot_data[x_var], plot_data[y_var], alpha=0.5)
            plt.xlabel(x_var)
            plt.ylabel(y_var)
            
            # Calculate and display the correlation
            correlation = plot_data[[x_var, y_var]].corr().iloc[0, 1]
            plt.title(f'{plot_type} of {x_var} and {y_var}\nCorrelation: {correlation:.2f}')
        else:
            plt.scatter(plot_data.index, plot_data[x_var], alpha=0.5)
            plt.xlabel('Index')
            plt.ylabel(x_var)
            plt.title(f'{plot_type} of {x_var}')
        
    elif plot_type == 'Time Series Plot':
        if log_x:
            plot_data[x_var] = np.log(plot_data[x_var] + epsilon)
        if y_var != 'None':
            fig, ax1 = plt.subplots(figsize=(15, 6))
            color = 'tab:blue'
            ax1.set_xlabel('Time')
            ax1.set_ylabel(x_var, color=color)
            ax1.plot(plot_data['timestamp'], plot_data[x_var].rolling(window=window).mean(), color=color, label=f'{x_var} (Moving Average)')
            ax1.tick_params(axis='y', labelcolor=color)

            ax2 = ax1.twinx()
            color = 'tab:red'
            if log_y:
                plot_data[y_var] = np.log(plot_data[y_var] + epsilon)
            ax2.set_ylabel(y_var, color=color)
            ax2.plot(plot_data['timestamp'], plot_data[y_var].rolling(window=window).mean(), color=color, label=f'{y_var} (Moving Average)')
            ax2.tick_params(axis='y', labelcolor=color)
            
            fig.legend(loc='upper right')
            plt.title(f'{plot_type} of {x_var} and {y_var}')
        else:
            plt.figure(figsize=(15, 6))
            plt.plot(plot_data['timestamp'], plot_data[x_var].rolling(window=window).mean(), label=f'{x_var} (Moving Average)')
            plt.xlabel('Time')
            plt.ylabel(x_var)
            plt.legend()
            plt.title(f'{plot_type} of {x_var}')
            
    plt.grid(True)
    plt.show()

interact(plot_data, 
         plot_type=widgets.RadioButtons(options=['Scatter Plot', 'Time Series Plot'], description='Plot Type'),
         x_var=widgets.Dropdown(options=variables, description='X Variable'),
         y_var=widgets.Dropdown(options=['None'] + variables, description='Y Variable'),
         log_x=widgets.Checkbox(value=False, description='Log X Variable'),
         log_y=widgets.Checkbox(value=False, description='Log Y Variable'),
         window=widgets.IntSlider(value=5, min=1, max=50, step=1, description='Moving Avg Window'),
         filter_cloud=widgets.Checkbox(value=False, description='Filter by Cloud Score'),
         cloud_band=widgets.RadioButtons(options=['cs', 'cs_cdf'], description='Cloud Band'),
         cloud_threshold=widgets.FloatSlider(value=0.1, min=0.0, max=1.0, step=0.01, description='Cloud Threshold'),
         custom_var_expr=widgets.Text(value='', description='Custom Var Expr'),
         drop_zero=widgets.Checkbox(value=False, description='Drop Zero Values')
);


interactive(children=(RadioButtons(description='Plot Type', options=('Scatter Plot', 'Time Series Plot'), valu…