In [None]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler

def preprocess_ieq_data(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    new_cols = {}
    for c in df.columns:
        new_name = c.lower()
        new_name = new_name.replace(", ", "_")
        new_name = new_name.replace(" ", "_")
        new_name = new_name.replace("µ", "u")
        new_name = new_name.replace("°", "deg")
        new_name = new_name.replace("%", "pct")
        new_name = new_name.replace("/", "_")
        new_name = new_name.replace(",", "")
        new_cols[c] = new_name
    df.rename(columns=new_cols, inplace=True)
    if 'dtm' in df.columns:
        df['dtm'] = pd.to_datetime(df['dtm'], errors='coerce')
        df.dropna(subset=['dtm'], inplace=True)
        df.sort_values(by='dtm', inplace=True)
        df.reset_index(drop=True, inplace=True)
    df.drop_duplicates(subset=['dtm'], keep='first', inplace=True)
    df.reset_index(drop=True, inplace=True)
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    for col in numeric_cols:
        df[col] = df[col].interpolate(method='linear', limit_direction='both')
    for col in numeric_cols:
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        df = df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]
    df.reset_index(drop=True, inplace=True)
    scaler = MinMaxScaler()
    df_numeric = df[numeric_cols].values
    df_numeric_scaled = scaler.fit_transform(df_numeric)
    df[numeric_cols] = df_numeric_scaled
    return df

auditorium_path = "/content/drive/MyDrive/VIP Data/Kendeda_Building_Auditorium_152_0c8b95e42928_27_Feb_2025_02_Mar_2025.csv"
classroom210_path = "/content/drive/MyDrive/VIP Data/Kendeda_Building_Classroom_210_0c8b95e37bf4_27_Feb_2025_02_Mar_2025.csv"
classroom230_path = "/content/drive/MyDrive/VIP Data/Kendeda_Building_Classroom_230_24d7eb23b7d4_27_Feb_2025_02_Mar_2025.csv"

save_auditorium_path = "/content/drive/MyDrive/VIP Data/Auditorium_152_preprocessed.csv"
save_classroom210_path = "/content/drive/MyDrive/VIP Data/Classroom_210_preprocessed.csv"
save_classroom230_path = "/content/drive/MyDrive/VIP Data/Classroom_230_preprocessed.csv"

df_aud = pd.read_csv(auditorium_path)
df_cls210 = pd.read_csv(classroom210_path)
df_cls230 = pd.read_csv(classroom230_path)

df_aud_processed = preprocess_ieq_data(df_aud)
df_cls210_processed = preprocess_ieq_data(df_cls210)
df_cls230_processed = preprocess_ieq_data(df_cls230)

df_aud_processed.to_csv(save_auditorium_path, index=False)
df_cls210_processed.to_csv(save_classroom210_path, index=False)
df_cls230_processed.to_csv(save_classroom230_path, index=False)

print(f"Auditorium => {save_auditorium_path}")
print(f"Classroom 210 => {save_classroom210_path}")
print(f"Classroom 230 => {save_classroom230_path}")

Mounted at /content/drive
Auditorium => /content/drive/MyDrive/VIP Data/Auditorium_152_preprocessed.csv
Classroom 210 => /content/drive/MyDrive/VIP Data/Classroom_210_preprocessed.csv
Classroom 230 => /content/drive/MyDrive/VIP Data/Classroom_230_preprocessed.csv
