In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

def normalize_dataset(dataframe, input_column, columns_to_normalize):
    """
    This function normalizes the specified columns of a pandas DataFrame using the MinMaxScaler.

    :param dataframe: A pandas DataFrame containing the data to be normalized.
    :param input_column: The name of the column that is the input variable.
    :param columns_to_normalize: A list of column names to be normalized.
    :return: A pandas DataFrame with the normalized columns.
    """
    # Create a copy of the dataframe to avoid modifying the original one
    df_normalized = dataframe.copy()
    
    # Initialize the MinMaxScaler
    scaler = MinMaxScaler()

    # Normalize only the specified columns
    df_normalized[columns_to_normalize] = scaler.fit_transform(df_normalized[columns_to_normalize])

    # Make sure the input variable is included in the columns to normalize
    if input_column not in columns_to_normalize:
        raise ValueError(f"The input variable '{input_column}' must be included in the columns to normalize.")

    # Check for funky data (infinite or NaN values post-normalization)
    if df_normalized[columns_to_normalize].isnull().values.any() or np.isinf(df_normalized[columns_to_normalize]).values.any():
        raise ValueError("Normalization resulted in NaN or infinite values.")

    return df_normalized

# Example usage:
# Assuming `df` is a pandas DataFrame that has been loaded with time series data relevant to power markets,
# and 'weather' is the column we wish to normalize, along with other related columns like 'temperature', 'humidity'.
# df_normalized = normalize_dataset(df, 'weather', ['weather', 'temperature', 'humidity'])


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

def linear_regression_analysis(dataframe, input_variable, variables_list):
    """
    This function performs linear regression on a given input variable against a list of other variables
    and returns the R2 score for each variable combination.

    :param dataframe: A pandas DataFrame containing the time series data.
    :param input_variable: The name of the input variable column.
    :param variables_list: A list of names of other variable columns to run regressions against.
    :return: A pandas DataFrame with the R2 values for each variable combination.
    """
    # Results dictionary to hold variable names and their corresponding R2 scores
    results = {'Variable': [], 'R2 Score': []}

    # Iterate over the list of variables to run regressions
    for variable in variables_list:
        if variable != input_variable:
            # Prepare the feature and target arrays for regression
            X = dataframe[[variable]].values
            y = dataframe[input_variable].values

            # Initialize and fit the linear regression model
            model = LinearRegression()
            model.fit(X, y)

            # Predict and calculate R2 score
            y_pred = model.predict(X)
            score = r2_score(y, y_pred)

            # Append results
            results['Variable'].append(variable)
            results['R2 Score'].append(score)

    # Convert results dictionary to a pandas DataFrame
    results_df = pd.DataFrame(results)
    return results_df

# Example usage:
# Assuming `df` is a pandas DataFrame that has been loaded with time series data,
# 'weather' is the input variable, and other variables like 'temperature', 'humidity', 'wind_speed' are to be analyzed.
# r2_results = linear_regression_analysis(df, 'weather', ['weather', 'temperature', 'humidity', 'wind_speed'])


In [None]:
import plotly.express as px
from sklearn.linear_model import LinearRegression
import numpy as np

def create_interactive_scatter_with_regression(dataframe, column1, column2):
    """
    This function creates an interactive scatter plot with a regression line for two columns in a DataFrame,
    with a different color for each year. It also displays the slope of the regression line and the R^2 value.

    :param dataframe: A pandas DataFrame containing the data to plot.
    :param column1: The name of the first column to plot.
    :param column2: The name of the second column to plot.
    """
    # Check if 'Year' column exists
    if 'Year' not in dataframe.columns:
        raise ValueError("DataFrame must contain a 'Year' column for coloring.")

    # Fit a linear regression model
    model = LinearRegression()
    X = dataframe[[column1]]
    y = dataframe[column2]
    model.fit(X, y)
    dataframe['Regression Line'] = model.predict(X)

    # Calculate R^2 score
    r2 = model.score(X, y)
    slope = model.coef_[0]

    # Create the scatter plot
    fig = px.scatter(dataframe, x=column1, y=column2, color='Year', trendline="ols",
                     trendline_color_override='black', labels={'Regression Line': f'Slope: {slope:.2f}, R²: {r2:.2f}'})

    # Show the plot in its own window
    fig.show()

# Example usage:
# Assuming `df` is a pandas DataFrame that contains 'Year', 'column1', and 'column2' columns,
# create_interactive_scatter_with_regression(df, 'column1', 'column2')



In [None]:
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import pandas as pd

def prophet_forecast_and_evaluate(dataframe, col1, periods, frequency):
    """
    This function fits a Prophet model to the specified column of a DataFrame and makes future predictions.

    :param dataframe: A pandas DataFrame with a datetime index.
    :param col1: The name of the column to forecast.
    :param periods: The number of periods to forecast into the future.
    :param frequency: The frequency of the time series data (e.g., 'D' for daily, 'M' for monthly).
    :return: A tuple containing the forecast DataFrame, error metrics, and the figure of the forecast.
    """
    # Prepare the data for Prophet
    df_prophet = dataframe.reset_index()
    df_prophet = df_prophet.rename(columns={'index': 'ds', col1: 'y'})

    # Fit the Prophet model
    model = Prophet()
    model.fit(df_prophet)

    # Create a DataFrame to hold future dates
    future = model.make_future_dataframe(periods=periods, freq=frequency)

    # Predict future values
    forecast = model.predict(future)

    # Calculate error metrics on the historical data
    y_true = df_prophet['y']
    y_pred = forecast['yhat'][:len(y_true)]
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100

    # Plot the forecast
    fig_forecast = plot_plotly(model, forecast)
    fig_components = plot_components_plotly(model, forecast)

    # Show the plots
    fig_forecast.show()
    fig_components.show()

    # Return the forecast, error metrics, and the figure object for further manipulation if needed
    return forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']], (mae, rmse, mape), (fig_forecast, fig_components)

# Example usage:
# Assuming `df` is a pandas DataFrame with a datetime index and 'col1' as the target column,
# predictions, errors, figures = prophet_forecast_and_evaluate(df, 'col1', 365, 'D')
