In [7]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
import numpy as np

# Load the dataset
file_path = 'C:/Users/e0974166/Downloads/DAILYDATA_S24_202101.csv'  # Replace with the path to your CSV file
data = pd.read_csv(file_path)

# Dropping non-relevant columns
data_ml = data.drop(['Station', 'Year', 'Month', 'Day'], axis=1)

# Convert non-numeric values ('-') to NaN
data_ml = data_ml.replace('-', np.nan).apply(pd.to_numeric, errors='coerce')

# Checking for missing values
missing_values = data_ml.isnull().sum()

# Handling missing values (if any)
# For example, you might fill missing values with the mean:
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
data_ml_imputed = imputer.fit_transform(data_ml)
data_ml = pd.DataFrame(data_ml_imputed, columns=data_ml.columns)

# Scaling the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data_ml)

# Convert the scaled data back to a dataframe
scaled_data_df = pd.DataFrame(scaled_data, columns=data_ml.columns)

# Generating 1000 observations by resampling with replacement (bootstrapping)
bootstrapped_data = scaled_data_df.sample(n=1000, replace=True, random_state=1)

# Save the preprocessed data to a CSV file
processed_data_file_path = 'processed_weather_data.csv'  # The name of the output file
bootstrapped_data.to_csv(processed_data_file_path, index=False)

print(f"Processed data saved to {processed_data_file_path}")



Processed data saved to processed_weather_data.csv
