In [None]:
# prompt: Implement programs for time series data cleaning, loading and handling times series data and pre-processing
# techniques

import pandas as pd
import numpy as np
from datetime import datetime

# Sample time series data (replace with your actual data)
data = {'date': pd.to_datetime(['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05', '2024-01-06']),
        'value': [10, 12, np.nan, 15, 14, 16]}
df = pd.DataFrame(data)

# 1. Loading Time Series Data
# Assuming your data is in a CSV file named 'time_series_data.csv'
# df = pd.read_csv('time_series_data.csv', index_col='date', parse_dates=True)


# 2. Data Cleaning
# a) Handling Missing Values
df['value'] = df['value'].interpolate(method='linear')  # Linear interpolation

# b) Handling Outliers (using IQR method)
Q1 = df['value'].quantile(0.25)
Q3 = df['value'].quantile(0.75)
IQR = Q3 - Q1
df = df[~((df['value'] < (Q1 - 1.5 * IQR)) | (df['value'] > (Q3 + 1.5 * IQR)))]

# c) Removing Duplicates
df = df.drop_duplicates(subset=['date'])

# 3. Data Preprocessing
# a) Smoothing
# Simple Moving Average
df['SMA_3'] = df['value'].rolling(window=3).mean()

# b) Feature Engineering (Example: day of the week)
df['day_of_week'] = df['date'].dt.dayofweek  # Monday=0, Sunday=6

# c) Normalization (Min-Max Scaling)
min_val = df['value'].min()
max_val = df['value'].max()
df['normalized_value'] = (df['value'] - min_val) / (max_val - min_val)


# 4. Time Series Data Handling
# a) Resampling (upsampling/downsampling)
df_daily = df.resample('D', on='date').mean().fillna(method='ffill') # Upsample to daily and fill missing values using forward fill

# b) Lagged features
df['lag_1'] = df['value'].shift(1)
df['lag_2'] = df['value'].shift(2)


# Print cleaned and preprocessed data
df

  df_daily = df.resample('D', on='date').mean().fillna(method='ffill') # Upsample to daily and fill missing values using forward fill


Unnamed: 0,date,value,SMA_3,day_of_week,normalized_value,lag_1,lag_2
0,2024-01-01,10.0,,0,0.0,,
1,2024-01-02,12.0,,1,0.333333,10.0,
2,2024-01-03,13.5,11.833333,2,0.583333,12.0,10.0
3,2024-01-04,15.0,13.5,3,0.833333,13.5,12.0
4,2024-01-05,14.0,14.166667,4,0.666667,15.0,13.5
5,2024-01-06,16.0,15.0,5,1.0,14.0,15.0
