In [1]:
import pandas as pd
import glob

# Read livingroom CSV files
livingroom_files = glob.glob('data/aq-monitor-livingroom*.csv')
livingroom = pd.concat([pd.read_csv(f) for f in livingroom_files], ignore_index=True)

# Read office CSV files
office_files = glob.glob('data/aq-monitor-office*.csv')
office = pd.concat([pd.read_csv(f) for f in office_files], ignore_index=True)

In [2]:
# Standardize column names for both dataframes
livingroom.columns = ['timestamp', 'pm25', 'temperature_f', 'humidity']
office.columns = ['timestamp', 'pm25', 'temperature_f', 'humidity']

In [3]:
livingroom = livingroom.sort_values(by='timestamp', ascending=True)
office = office.sort_values(by='timestamp', ascending=True)

In [4]:
# Convert temperature from Fahrenheit to Celsius
livingroom['temperature_c'] = (livingroom['temperature_f'] - 32) * 5/9
office['temperature_c'] = (office['temperature_f'] - 32) * 5/9

# Drop the Fahrenheit column
livingroom = livingroom.drop('temperature_f', axis=1)
office = office.drop('temperature_f', axis=1)

In [5]:
# Calculate average gap between observations for each room
livingroom_avg_gap = pd.to_datetime(livingroom['timestamp']).diff().mean()
office_avg_gap = pd.to_datetime(office['timestamp']).diff().mean()

# Calculate median and standard deviation of gaps
livingroom_median_gap = pd.to_datetime(livingroom['timestamp']).diff().median()
livingroom_std_gap = pd.to_datetime(livingroom['timestamp']).diff().std()

office_median_gap = pd.to_datetime(office['timestamp']).diff().median()
office_std_gap = pd.to_datetime(office['timestamp']).diff().std()

print(f"Living Room average gap between observations: {livingroom_avg_gap}")
print(f"Living Room median gap between observations: {livingroom_median_gap}")
print(f"Living Room std dev of gap between observations: {livingroom_std_gap}")
print()
print(f"Office average gap between observations: {office_avg_gap}")
print(f"Office median gap between observations: {office_median_gap}")
print(f"Office std dev of gap between observations: {office_std_gap}")

Living Room average gap between observations: 0 days 00:01:00.391429286
Living Room median gap between observations: 0 days 00:01:00
Living Room std dev of gap between observations: 0 days 00:01:42.199311661

Office average gap between observations: 0 days 00:01:02.111401033
Office median gap between observations: 0 days 00:01:00
Office std dev of gap between observations: 0 days 00:07:24.725784981


In [6]:
# Convert timestamp to datetime
livingroom['datetime'] = pd.to_datetime(livingroom['timestamp'])
office['datetime'] = pd.to_datetime(office['timestamp'])

# Filter records between midnight and 7 AM
livingroom_hours = livingroom[(livingroom['datetime'].dt.hour >= 0) & (livingroom['datetime'].dt.hour <= 7)]
office_hours = office[(office['datetime'].dt.hour >= 0) & (office['datetime'].dt.hour < 7)]

# Keep records from October to December only
livingroom_hours_months = livingroom_hours[livingroom_hours['datetime'].dt.month.isin([11, 12])]
office_hours_months = office_hours[office_hours['datetime'].dt.month.isin([11, 12])]

In [7]:
import numpy as np

# Function to insert NaN temperature record before the first of every day
def insert_gaps(df):
    # Extract the last record of each day
    last_records = df.groupby(df['datetime'].dt.date).last()
    # Create new rows with NaN temperature, 1 hour after the last timestamp
    gaps = last_records.copy()
    gaps['temperature_c'] = np.nan
    gaps['datetime'] = gaps['datetime'] + pd.Timedelta(hours=1)
    # Append gaps to the original dataframe and sort
    df_with_gaps = pd.concat([df, gaps]).sort_values(by='datetime').reset_index(drop=True)
    return df_with_gaps

# Apply the function to both livingroom_hours_months and office_hours_months
livingroom_hours_months = insert_gaps(livingroom_hours_months)
office_hours_months = insert_gaps(office_hours_months)

In [8]:
from scipy.signal import find_peaks

# Function to detect peaks and add the is_peak column to the dataframe
def add_peak_column(df):
    df['is_peak'] = 0  # Initialize the is_peak column with 0
    for date in df['datetime'].dt.date.unique():
        # Get the data for the current date
        daily_data = df[df['datetime'].dt.date == date]
        
        # Find the local maxima in the temperature data
        peaks, _ = find_peaks(daily_data['temperature_c'].dropna(), distance=20, prominence=0.3, width=10)
        # Find valleys (local minima) in the temperature data
        valleys, _ = find_peaks(-daily_data['temperature_c'].dropna(), distance=20, prominence=0.3, width=10)
        
        # Add a binary column for peaks
        peak_indices = daily_data.iloc[peaks].index
        valley_indices = daily_data.iloc[valleys].index
        df.loc[peak_indices, 'is_peak'] = 1
        df.loc[valley_indices, 'is_peak'] = -1

# Apply the function to both dataframes
add_peak_column(livingroom_hours_months)
add_peak_column(office_hours_months)


In [14]:
from sklearn.linear_model import HuberRegressor

# Function to calculate cooling rates between peaks and valleys
def calculate_cooling_rates(df):
    df['cooling_rate'] = np.nan
    
    # Get indices of peaks and valleys
    peak_indices = df[df['is_peak'] == 1].index.tolist()
    valley_indices = df[df['is_peak'] == -1].index.tolist()
    
    previous_rate = np.nan
    
    # Iterate through peaks
    for peak_idx in peak_indices:
        # Find the next valley after this peak
        next_valleys = [v for v in valley_indices if v > peak_idx]
        
        if len(next_valleys) > 0:
            valley_idx = next_valleys[0]
            
            # Get the segment between peak and valley
            segment = df.loc[peak_idx:valley_idx]
            
            # Filter out NaN temperatures
            segment_clean = segment[segment['temperature_c'].notna()]
            
            if len(segment_clean) >= 2:
                # Prepare data for Huber regression
                X = np.arange(len(segment_clean)).reshape(-1, 1)
                y = segment_clean['temperature_c'].values
                
                # Fit Huber regression
                huber = HuberRegressor()
                huber.fit(X, y)
                
                # The cooling rate is the slope (coefficient)
                cooling_rate = huber.coef_[0]
                previous_rate = cooling_rate
            else:
                cooling_rate = previous_rate
        else:
            # No matching valley, use previous rate
            cooling_rate = previous_rate
        
        # Assign the cooling rate to all points in this segment
        if len(next_valleys) > 0:
            valley_idx = next_valleys[0]
            df.loc[peak_idx:valley_idx, 'cooling_rate'] = cooling_rate
        else:
            df.loc[peak_idx:, 'cooling_rate'] = cooling_rate
    
    return df

# Apply to both dataframes
livingroom_cooling_rates = calculate_cooling_rates(livingroom_hours_months.copy())
office_cooling_rates = calculate_cooling_rates(office_hours_months.copy())

In [15]:
import plotly.graph_objects as go

# Create the figure
fig = go.Figure()

# Add the main temperature time series
fig.add_trace(go.Scatter(
    x=office_cooling_rates['datetime'],
    y=office_cooling_rates['temperature_c'],
    mode='lines',
    name='Office Temperature',
    line=dict(color='blue'),
    showlegend=False
))

# Add vertical dotted lines at peak locations
peaks_data = office_cooling_rates[office_cooling_rates['is_peak'] == 1]
for idx, row in peaks_data.iterrows():
    fig.add_vline(
        x=row['datetime'],
        line_dash="dot",
        line_color="red",
        opacity=0.5
    )

# Add vertical dotted lines at valley locations
valleys_data = office_cooling_rates[office_cooling_rates['is_peak'] == -1]
for idx, row in valleys_data.iterrows():
    fig.add_vline(
        x=row['datetime'],
        line_dash="dot",
        line_color="darkgreen",
        opacity=0.5
    )

# Get unique cooling rate segments and add text annotations
peak_indices = office_cooling_rates[office_cooling_rates['is_peak'] == 1].index.tolist()

for i in range(len(peak_indices) - 1):
    start_idx = peak_indices[i]
    end_idx = peak_indices[i + 1]
    
    segment = office_cooling_rates.loc[start_idx:end_idx]
    cooling_rate = segment['cooling_rate'].iloc[0]
    
    if pd.notna(cooling_rate):
        # Calculate midpoint for text annotation
        mid_datetime = segment['datetime'].iloc[len(segment)//2]
        mid_temp = segment['temperature_c'].iloc[len(segment)//2]
        
        # Add text annotation
        fig.add_annotation(
            x=mid_datetime,
            y=mid_temp,
            text=f'{cooling_rate:.4f}',
            showarrow=False,
            font=dict(size=10, color='darkgreen'),
            bgcolor='rgba(255, 255, 255, 0.7)',
            bordercolor='darkgreen',
            borderwidth=1
        )

# Update layout with range selector
fig.update_layout(
    title='Office Temperature Time Series with Cooling Rates',
    xaxis=dict(
        title='Timestamp',
        rangeselector=dict(
            buttons=[
                dict(count=1, label='1d', step='day', stepmode='backward'),
                dict(count=7, label='1w', step='day', stepmode='backward'),
                dict(count=1, label='1m', step='month', stepmode='backward'),
                dict(step='all')
            ]
        ),
        rangeslider=dict(visible=True)
    ),
    yaxis=dict(title='Temperature (°C)'),
    hovermode='x unified',
    showlegend=False
)

fig.show()

In [16]:
import plotly.graph_objects as go

# Create the figure
fig = go.Figure()

# Add the main temperature time series
fig.add_trace(go.Scatter(
    x=livingroom_cooling_rates['datetime'],
    y=livingroom_cooling_rates['temperature_c'],
    mode='lines',
    name='Living Room Temperature',
    line=dict(color='blue'),
    showlegend=False,
    connectgaps=False
))

# Add vertical dotted lines at peak locations
peaks_data = livingroom_cooling_rates[livingroom_cooling_rates['is_peak'] == 1]
for idx, row in peaks_data.iterrows():
    fig.add_vline(
        x=row['datetime'],
        line_dash="dot",
        line_color="red",
        opacity=0.5
    )

# Add vertical dotted lines at valley locations
valleys_data = livingroom_cooling_rates[livingroom_cooling_rates['is_peak'] == -1]
for idx, row in valleys_data.iterrows():
    fig.add_vline(
        x=row['datetime'],
        line_dash="dot",
        line_color="darkgreen",
        opacity=0.5
    )

# Get unique cooling rate segments and add text annotations
peak_indices = livingroom_cooling_rates[livingroom_cooling_rates['is_peak'] == 1].index.tolist()

for i in range(len(peak_indices) - 1):
    start_idx = peak_indices[i]
    end_idx = peak_indices[i + 1]
    
    segment = livingroom_cooling_rates.loc[start_idx:end_idx]
    cooling_rate = segment['cooling_rate'].iloc[0]
    
    if pd.notna(cooling_rate):
        # Calculate midpoint for text annotation
        mid_datetime = segment['datetime'].iloc[len(segment)//2]
        mid_temp = segment['temperature_c'].iloc[len(segment)//2]
        
        # Add text annotation
        fig.add_annotation(
            x=mid_datetime,
            y=mid_temp,
            text=f'{cooling_rate:.4f}',
            showarrow=False,
            font=dict(size=10, color='darkgreen'),
            bgcolor='rgba(255, 255, 255, 0.7)',
            bordercolor='darkgreen',
            borderwidth=1
        )

# Update layout with range selector
fig.update_layout(
    title='Living Room Temperature Time Series with Cooling Rates',
    xaxis=dict(
        title='Timestamp',
        rangeselector=dict(
            buttons=[
                dict(count=1, label='1d', step='day', stepmode='backward'),
                dict(count=7, label='1w', step='day', stepmode='backward'),
                dict(count=1, label='1m', step='month', stepmode='backward'),
                dict(step='all')
            ]
        ),
        rangeslider=dict(visible=True)
    ),
    yaxis=dict(title='Temperature (°C)'),
    hovermode='x unified',
    showlegend=False
)

fig.show()

In [18]:
# Create daily statistics for cooling rates (excluding NaN values)
daily_stats_office = office_cooling_rates[office_cooling_rates['cooling_rate'].notna()].groupby(office_cooling_rates['datetime'].dt.date)['cooling_rate'].agg(['mean', 'median', 'std']).reset_index()
daily_stats_office.columns = ['date', 'mean_cooling_rate_office', 'median_cooling_rate_office', 'std_cooling_rate_office']

daily_stats_livingroom = livingroom_cooling_rates[livingroom_cooling_rates['cooling_rate'].notna()].groupby(livingroom_cooling_rates['datetime'].dt.date)['cooling_rate'].agg(['mean', 'median', 'std']).reset_index()
daily_stats_livingroom.columns = ['date', 'mean_cooling_rate_livingroom', 'median_cooling_rate_livingroom', 'std_cooling_rate_livingroom']

# Merge the two dataframes
daily_stats = pd.merge(daily_stats_office, daily_stats_livingroom, on='date', how='inner')

# Load the Pittsburgh minimum temperature data
pitt_min_temp = pd.read_csv('data/pitt_min_temp.csv')

# Convert date column to datetime for joining
pitt_min_temp['DATE'] = pd.to_datetime(pitt_min_temp['DATE'])
daily_stats['date'] = pd.to_datetime(daily_stats['date'])

# Join the daily minimum temperature
daily_stats = pd.merge(daily_stats, pitt_min_temp[['DATE', 'TMIN']], left_on='date', right_on='DATE', how='left')
daily_stats = daily_stats.drop('DATE', axis=1)
daily_stats = daily_stats.rename(columns={'TMIN': 'daily_min_temp'})

daily_stats

Unnamed: 0,date,mean_cooling_rate_office,median_cooling_rate_office,std_cooling_rate_office,mean_cooling_rate_livingroom,median_cooling_rate_livingroom,std_cooling_rate_livingroom,daily_min_temp
0,2025-11-01,-0.005016,-0.004617,0.001762,-0.006319,-0.006345,0.00128,6.1
1,2025-11-02,-0.00389,-0.002486,0.002041,-0.004169,-0.005039,0.001151,1.7
2,2025-11-04,-0.011931,-0.01329,0.001773,-0.003831,-0.004877,0.002756,0.0
3,2025-11-05,-0.005139,-0.006149,0.002912,-0.007654,-0.008527,0.004338,0.6
4,2025-11-06,-0.003451,-0.004893,0.002923,-0.005089,-0.004378,0.001274,-1.1
5,2025-11-07,-0.011762,-0.014323,0.006223,-0.01043,-0.012063,0.003694,-1.1
6,2025-11-08,-0.000765,-0.000765,0.0,-0.000534,-0.000534,0.0,3.3
7,2025-11-09,-0.004446,-0.005813,0.002246,-0.000534,-0.000534,0.0,1.1
8,2025-11-10,-0.014175,-0.014224,0.003302,-0.013411,-0.011941,0.010136,-3.3
9,2025-11-11,-0.017332,-0.018495,0.006217,-0.037603,-0.045138,0.015589,-2.8


In [19]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Create subplots with 2 rows (one for each room) with secondary y-axis
fig = make_subplots(
    rows=2, cols=1,
    subplot_titles=('Office', 'Living Room'),
    vertical_spacing=0.12,
    specs=[[{"secondary_y": True}], [{"secondary_y": True}]]
)

# Add office mean and median traces
fig.add_trace(
    go.Scatter(
        x=daily_stats['date'],
        y=daily_stats['mean_cooling_rate_office'],
        mode='lines',
        name='Office Mean',
        line=dict(color='blue')
    ),
    row=1, col=1, secondary_y=False
)

fig.add_trace(
    go.Scatter(
        x=daily_stats['date'],
        y=daily_stats['median_cooling_rate_office'],
        mode='lines',
        name='Office Median',
        line=dict(color='blue', dash='dash')
    ),
    row=1, col=1, secondary_y=False
)

# Add min temp bars for office
fig.add_trace(
    go.Bar(
        x=daily_stats['date'],
        y=daily_stats['daily_min_temp'],
        name='Min Temp',
        marker=dict(color='red', opacity=0.3),
        showlegend=True
    ),
    row=1, col=1, secondary_y=True
)

# Add living room mean and median traces
fig.add_trace(
    go.Scatter(
        x=daily_stats['date'],
        y=daily_stats['mean_cooling_rate_livingroom'],
        mode='lines',
        name='Living Room Mean',
        line=dict(color='green')
    ),
    row=2, col=1, secondary_y=False
)

fig.add_trace(
    go.Scatter(
        x=daily_stats['date'],
        y=daily_stats['median_cooling_rate_livingroom'],
        mode='lines',
        name='Living Room Median',
        line=dict(color='green', dash='dash')
    ),
    row=2, col=1, secondary_y=False
)

# Add min temp bars for living room
fig.add_trace(
    go.Bar(
        x=daily_stats['date'],
        y=daily_stats['daily_min_temp'],
        name='Min Temp',
        marker=dict(color='red', opacity=0.3),
        showlegend=False
    ),
    row=2, col=1, secondary_y=True
)

# Update x-axes to show weekly ticks
fig.update_xaxes(
    dtick=7*24*60*60*1000,  # 7 days in milliseconds
    tickformat='%Y-%m-%d',
    row=1, col=1
)

fig.update_xaxes(
    title_text='Date',
    dtick=7*24*60*60*1000,  # 7 days in milliseconds
    tickformat='%Y-%m-%d',
    row=2, col=1
)

# Update y-axes
fig.update_yaxes(title_text='Cooling Rate (°C/min)', row=1, col=1, secondary_y=False)
fig.update_yaxes(title_text='Min Temp (°C)', row=1, col=1, secondary_y=True)
fig.update_yaxes(title_text='Cooling Rate (°C/min)', row=2, col=1, secondary_y=False)
fig.update_yaxes(title_text='Min Temp (°C)', row=2, col=1, secondary_y=True)

# Update layout
fig.update_layout(
    height=800,
    title_text='Daily Cooling Rates by Room',
    hovermode='x unified',
    showlegend=True
)

fig.show()