In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler

# Step 1: Generating a synthetic time series dataset with missing values
def generate_time_series_data():
    date_range = pd.date_range(start='2014-01-01', periods=100, freq='D')
    data = np.sin(np.linspace(0, 10, 100)) + np.random.normal(0, 0.1, 100)  # Sinusoidal pattern with noise
    data[np.random.randint(0, 100, 10)] = np.nan  # Introduce missing values
    return pd.DataFrame({'Date': date_range, 'Value': data}).set_index('Date')

# Step 2: Handling Missing Data
def handle_missing_data(df):
    df_ffill = df.fillna(method='ffill')
    df_bfill = df.fillna(method='bfill')
    df_interpolated = df.interpolate(method='linear')
    return df_ffill, df_bfill, df_interpolated

# Step 3: Preprocessing Techniques
def preprocess_time_series(df):
    df_resampled = df.resample('W').mean()  # Weekly resampling
    scaler = MinMaxScaler()
    df['Normalized_Value'] = scaler.fit_transform(df[['Value']])
    return df_resampled, df

# Step 4: Visualizing Time Series Data
def visualize_time_series(df):
    plt.figure(figsize=(12, 6))
    plt.plot(df.index, df['Value'], marker='o', linestyle='-', color='blue', label='Original Data')
    plt.title("Time Series Line Plot")
    plt.xlabel("Date")
    plt.ylabel("Value")
    plt.legend()
    plt.show()

    plt.figure(figsize=(12, 6))
    plt.scatter(df.index, df['Value'], color='red', label='Scatter Plot')
    plt.title("Time Series Scatter Plot")
    plt.xlabel("Date")
    plt.ylabel("Value")
    plt.legend()
    plt.show()

    df_bar = df.resample('M').mean()
    df_bar.plot(kind='bar', figsize=(12, 6), legend=False, color='purple')
    plt.title("Monthly Average Values - Bar Chart")
    plt.xlabel("Month")
    plt.ylabel("Average Value")
    plt.xticks(rotation=45)
    plt.show()

# Step 5: Running the program
def main():
    df = generate_time_series_data()
    print("Original Dataset with Missing Values:\n", df.head())

    df_ffill, df_bfill, df_interpolated = handle_missing_data(df)
    df_resampled, df_normalized = preprocess_time_series(df_interpolated)

    print("\nDataset After Forward Fill:\n", df_ffill.head())
    print("\nDataset After Backward Fill:\n", df_bfill.head())
    print("\nDataset After Interpolation:\n", df_interpolated.head())
    print("\nResampled Data (Weekly):\n", df_resampled.head())
    print("\nNormalized Data:\n", df_normalized.head())

    visualize_time_series(df_interpolated)

    # Additional visualizations
    plt.figure(figsize=(12, 6))
    sns.histplot(df_interpolated['Value'], bins=20, kde=True, color='blue', label='Histogram')
    plt.title("Time Series Data Distribution")
    plt.xlabel("Value")
    plt.ylabel("Frequency")
    plt.legend()
    plt.show()

    plt.figure(figsize=(12, 6))
    sns.boxplot(data=df_interpolated, y='Value', color='green')
    plt.title("Box Plot of Time Series Data")
    plt.ylabel("Value")
    plt.show()

    plt.figure(figsize=(12, 6))
    sns.heatmap(df_interpolated.corr(), annot=True, cmap='coolwarm', linewidths=0.5)
    plt.title("Heatmap of Time Series Data")
    plt.show()

# Execute the program
if __name__ == "__main__":
    main()


Original Dataset with Missing Values:
                Value
Date                
2014-01-01  0.065203
2014-01-02       NaN
2014-01-03  0.162001
2014-01-04  0.203172
2014-01-05  0.386178

Dataset After Forward Fill:
                Value
Date                
2014-01-01  0.065203
2014-01-02  0.065203
2014-01-03  0.162001
2014-01-04  0.203172
2014-01-05  0.386178

Dataset After Backward Fill:
                Value
Date                
2014-01-01  0.065203
2014-01-02  0.162001
2014-01-03  0.162001
2014-01-04  0.203172
2014-01-05  0.386178

Dataset After Interpolation:
                Value  Normalized_Value
Date                                  
2014-01-01  0.065203          0.539993
2014-01-02  0.113602          0.561904
2014-01-03  0.162001          0.583814
2014-01-04  0.203172          0.602453
2014-01-05  0.386178          0.685302

Resampled Data (Weekly):
                Value
Date                
2014-01-05  0.186031
2014-01-12  0.677387
2014-01-19  0.929507
2014-01-26  0.786104
20