# Preprocessing and Data Preparation for Time Series Data


## Preprocessing Techniques
1. Missing Values
2. Dealing with outliers
3. Resampling for forecasting
4. Scaling and Normalization
5. Seasonality and Trend

In [8]:


# Import the necessary libraries

from scipy import stats

import pandas as pd

import numpy as np

from sklearn.preprocessing import MinMaxScaler, StandardScaler

from statsmodels.tsa.seasonal import seasonal_decompose

In [9]:
# Create time series index

start = pd.to_datetime('2023-01-01 00:00:00')

timestamps = pd.date_range(start, periods=100, freq='15T')

dates = pd.to_datetime(timestamps)

# Create a time series with random values

values = np.random.rand(len(dates))

# Create a DataFrame with the time index and values

time_series = pd.DataFrame({'Value': values}, index=dates)

## Missing Values

In [10]:


# Fill missing values with forward fill (last known value)

data_filled = time_series.ffill()

# Fill missing values with backward fill (next known value)

data_filled = time_series.bfill()

# Fill missing values with interpolation

data_filled = time_series.interpolate()

## Dealing with Outliers

In [11]:


# Compute z-scores for each data point

z_scores = np.abs(stats.zscore(time_series))

# Define a threshold for outlier detection

threshold = 3

# Identify outlier indices

outlier_indices = np.where(z_scores > threshold)[0]

# Remove outliers

data_no_outliers = time_series.copy()

data_no_outliers.iloc[outlier_indices] = np.nan

## Resampling for Forecasting

In [None]:
# Resample to a higher frequency (e.g., from daily to hourly)

data_resampled = time_series.resample('H').mean()

# Resample to a lower frequency (e.g., from daily to monthly)

data_resampled = time_series.resample('M').sum()

## Scaling and Normalization

In [None]:
# Min-max scaling

scaler = MinMaxScaler()

data_scaled = scaler.fit_transform(time_series)

# Standardization

scaler = StandardScaler()

data_standardized = scaler.fit_transform(time_series)

## Seasonality and Trend

In [None]:
# Decompose the time series into trend, seasonal, and residual components

decomposition = seasonal_decompose(time_series)
# Access the individual components

trend = decomposition.trend

seasonal = decomposition.seasonal

residual = decomposition.resid