In [1]:
%pip install seaborn

Defaulting to user installation because normal site-packages is not writeable
^C
Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

#Step 1
# Load your dataset (for example, a CSV file)
df = pd.read_csv('sample_time_series_data.csv')
df = df.set_index('date')

# Ensuring that the index is of datetime type
df.index = pd.to_datetime(df.index)


# Check the first few rows of the dataset
print(df.head())


In [None]:
# Generating descriptive statistics of the DataFrame
df.describe()

In [None]:
#Step 2

# Setting pyplot style
plt.style.use("seaborn")

# Plotting time series
plt.figure(figsize=(10,6))
df['value'].plot(title='Value - Time Plot')
plt.ylabel('Value')
plt.xlabel('Date')
plt.show()

The pattern showns trends of growth over year. The series is not stationary and exhibits minimum  yearly seasonality

In [14]:
#Step 3: Seasonal Plots

df['year'] = df.index.year
df['month'] = df.index.month
df.reset_index(inplace=True)
df['week'] = df['date'].dt.isocalendar().week
df['hour'] = df['date'].dt.hour
df['day'] = df['date'].dt.dayofweek
df['day_str'] = df['date'].dt.strftime('%a')
df['year_month'] = df['date'].dt.strftime('%Y_%m')
df.set_index('date', inplace=True)

In [None]:
df.head()

In [None]:
#Seasonal plot — Yearly

np.random.seed(42)
df_plot = df[['month', 'year', 'value']].dropna().groupby(['month', 'year']).mean()[['value']].reset_index()
years = df_plot['year'].unique()
colors = ['blue', 'green', 'red', 'purple', 'orange', 'cyan', 'magenta', 'yellow', 'brown', 'black']

# Plotting
plt.figure(figsize=(16,12))
for i, y in enumerate(years):
    if i < len(colors):  # Ensuring we don't exceed the number of available colors
        plt.plot('month', 'value', data=df_plot[df_plot['year'] == y], color=colors[i], label=y)
        if y == 2018:
            plt.text(df_plot.loc[df_plot.year==y, :].shape[0]+0.3, df_plot.loc[df_plot.year==y, 'value'][-1:].values[0], y, fontsize=12, color=colors[i])
        else:
            plt.text(df_plot.loc[df_plot.year==y, :].shape[0]+0.1, df_plot.loc[df_plot.year==y, 'value'][-1:].values[0], y, fontsize=12, color=colors[i])


# Setting labels and titles
plt.gca().set(ylabel= 'Value', xlabel = 'Month')
plt.yticks(fontsize=12, alpha=.7)
plt.title("Seasonal Plot - Monthly ", fontsize=20)
plt.ylabel('Value')
plt.xlabel('Month')
plt.show()


In [None]:
#Seasonal plot — Weekly
# Preparing the data
np.random.seed(42)
df_plot = df[['month', 'day_str', 'value', 'day']].dropna()
df_plot = df_plot.groupby(['day_str', 'month', 'day']).mean()[['value']].reset_index()
df_plot = df_plot.sort_values(by='day', ascending=True)

# Defining the color palette
months = df_plot['month'].unique()
colors = plt.cm.get_cmap('tab20', len(months)).colors  # Using a colormap from matplotlib

# Plotting
plt.figure(figsize=(16, 12))
for i, month in enumerate(months):
    month_data = df_plot[df_plot['month'] == month]
    plt.plot(month_data['day_str'], month_data['value'], color=colors[i], label=month)
    plt.text(len(month_data['day_str']) - 1, month_data['value'].iloc[-1], month, fontsize=12, color=colors[i])

# Setting labels and titles
plt.xlabel('Day of the Week')
plt.ylabel('Value')
plt.title('Seasonal Plot - Weekly ', fontsize=20)
plt.xticks(fontsize=12, alpha=0.7)
plt.yticks(fontsize=12, alpha=0.7)
plt.legend(title='Month')
plt.show()


In [None]:
#Boxplot
plt.figure(figsize=(7,4))
sns.boxplot(data=df, x='value')
plt.xlabel('Value')
plt.title(f'Boxplot - Value Distribution');

In [None]:
#Day month
# Extracting year and month features
df['year'] = df.index.year
df['month'] = df.index.month
df['year_month'] = df.index.strftime('%Y-%m')

# Filtering data for years 2023 and later
df_plot = df[df['year'] >= 2023]

# Plotting boxplot
plt.figure(figsize=(12, 6))
sns.boxplot(x='year_month', y='value', data=df_plot)
plt.title('Distribution of Monthly data (2023 and later)', color='red')
plt.xlabel('Year-Month', color ='red')
plt.ylabel('Value', color ='red')
plt.xticks(rotation=45)
plt.show()


In [None]:
#Day
# Sorting the DataFrame by day of the week
df_plot = df[['day_str', 'day', 'value']].sort_values(by='day')

# Plotting boxplot
plt.figure(figsize=(10, 6))
sns.boxplot(x='day_str', y='value', data=df_plot)
plt.title('Distribution  by Day of the Week', color ='red')
plt.xlabel('Day of the Week', color ='red')
plt.ylabel('Value', color='red')
plt.show()


In [None]:
#Step 5: Time Series Decomposition

#Importing relevant modeule
from statsmodels.tsa.seasonal import seasonal_decompose

# Filtering data for the year 2023 and reset index
df_plot = df[df['year'] == 2023].reset_index()

# Removing duplicate entries and sort by date
df_plot = df_plot.drop_duplicates(subset=['date']).sort_values(by='date')
df_plot.sort_index(inplace=True)
# Setting Datetime column as index
df_plot = df_plot.set_index('date')


# Copying the 'Value' column for both additive and multiplicative decompose
df_plot['Multiplicative Decompose'] = df_plot['value']
df_plot['Additive Decompose'] = df_plot['value']

# Performing additive decomposition
result_add = seasonal_decompose(df_plot['Additive Decompose'], model='additive',period=1)

# Performing multiplicative decomposition
result_mul = seasonal_decompose(df_plot['Multiplicative Decompose'], model='multiplicative',period=1)

# Plotting additive decomposition
result_add.plot().suptitle('', fontsize=22)
plt.xticks(rotation=45)

# Plotting multiplicative decomposition
result_mul.plot().suptitle('', fontsize=22)
plt.xticks(rotation=45)

# Displaying the plots
plt.show()

In [None]:
#Step 6: Lag Analysis

from statsmodels.graphics.tsaplots import plot_pacf

actual = df['value']
hours_of_interest = range(0, 1, 1)

for hour in hours_of_interest:
    hour_data = actual[actual.index.hour == hour].diff().dropna()
    plot_pacf(hour_data, lags=10, alpha=0.01)
    plt.title(f'Partial Autocorrelation - Hour {hour}')
    plt.ylabel('Correlation')
    plt.xlabel('Lags')
    plt.show()