# Data Visualization Functions
This notebook contains Python functions for data visualization. These functions will create scatter plots based on different criteria such as year and month, as well as perform linear regression analysis.
The functions are:
1. Scatter plot colored by year
2. Scatter plot colored by month
3. Scatter plot with linear regression and R^2 value

In [None]:
# Importing required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

## 1. Scatter Plot Colored by Year
This function will create a scatter plot of two columns from a DataFrame, where each point is colored based on the year.

In [None]:
# Function to create scatter plot colored by year
def scatter_by_year(df, x_col, y_col, date_col):
    """Create a scatter plot of two columns colored by year.

    Parameters:
        df (pd.DataFrame): The DataFrame containing the data.
        x_col (str): The name of the x-axis column.
        y_col (str): The name of the y-axis column.
        date_col (str): The name of the date column.
    """
    # Extract year from date column
    df['Year'] = pd.to_datetime(df[date_col]).dt.year

    # Create scatter plot
    plt.figure(figsize=(10, 6))
    sns.scatterplot(data=df, x=x_col, y=y_col, hue='Year', palette='viridis')
    plt.legend(title='Year', loc='upper left')
    plt.title(f'{y_col} vs {x_col} by Year')
    plt.show()

## 2. Scatter Plot Colored by Month
This function will create a scatter plot of two columns from a DataFrame, where each point is colored based on the month.

In [None]:
# Function to create scatter plot colored by month
def scatter_by_month(df, x_col, y_col, date_col):
    """Create a scatter plot of two columns colored by month.

    Parameters:
        df (pd.DataFrame): The DataFrame containing the data.
        x_col (str): The name of the x-axis column.
        y_col (str): The name of the y-axis column.
        date_col (str): The name of the date column.
    """
    # Extract month from date column
    df['Month'] = pd.to_datetime(df[date_col]).dt.month

    # Create scatter plot
    plt.figure(figsize=(10, 6))
    sns.scatterplot(data=df, x=x_col, y=y_col, hue='Month', palette='coolwarm')
    plt.legend(title='Month', loc='upper left')
    plt.title(f'{y_col} vs {x_col} by Month')
    plt.show()

## 3. Scatter Plot with Linear Regression and R^2 Value
This function will create a scatter plot of two columns from a DataFrame and overlay a linear regression line. It will also display the R^2 value in the legend.

In [None]:
# Function to create scatter plot with linear regression and R^2 value
def scatter_with_regression(df, x_col, y_col):
    """Create a scatter plot with a linear regression line and R^2 value.

    Parameters:
        df (pd.DataFrame): The DataFrame containing the data.
        x_col (str): The name of the x-axis column.
        y_col (str): The name of the y-axis column.
    """
    # Prepare data
    X = df[[x_col]].values
    y = df[y_col].values

    # Perform linear regression
    model = LinearRegression()
    model.fit(X, y)
    y_pred = model.predict(X)
    r2 = r2_score(y, y_pred)

    # Create scatter plot and regression line
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x=X.ravel(), y=y, label='Data')
    plt.plot(X, y_pred, color='red', label=f'Regression Line (R^2 = {r2:.2f})')
    plt.legend(title='Legend', loc='upper left')
    plt.title(f'{y_col} vs {x_col} with Linear Regression')
    plt.show()