In [1]:
#!/usr/bin/env python3
# coding: utf-8

# Load dependencies
import numpy as np
import matplotlib.pylab as plt
import pandas as pd

# For interactive plots, install `plotly`
# import plotly.express as px
# pd.options.plotting.backend = "plotly"

# Load the data from the Varennes NetZEB Library
Column descriptions:

    # Temperatures
    'T_ext': exterior temp, C
    'T_int': average interior temp, C
    'T_slab: slab temperature, 2 inch from surface, C
    'T_room_air_over_slab': average room temperature over the slab where there is a radiant system, C
    'T_room_air_no_slab': same but where there isn't, C

    # BIPV/T System
    'BIPVT_outletT': temperature at BIPV/T outlet, C
    'BIPVT_dT': ^ minus T_ext, K
    'BIPVT_heat: m*cp*dT = fanstate*rated_flow*density*cp*dT, kW

    # Heating/Cooling
    'geo_heat': m*cp*dT at the geothermal well inlet: how much heat are we rejecting/extracting into the ground, kW
    'heating_total': m*cp*dT at the heat pump condensor/hot side, kW
    'cooling_total': evaporator/cold side, kW
    'Q_slab': heat going into the radiant slab, kW
    'Q_rest_heating': heat going into the air, kW
    'Q_rest': heat - cooling going into the air, kW
    'heating_slab_ratio': heating going into slab over total, rest goes into the air
    'cooling_slab_ratio': cooling going into slab over total, rest goes into the air

    # Windows
    'window_state: 0: closed, 1: open

    # Solar
    'DNI': direct normal irradiance, kW/m^2
    'GHI': global horizontal irradiance, kW/m^2
    'DHI': diffuse horizontal irradiance, kW/m^2
    'solar_projected_PV': unitized solar radiation incident on PV system, kW/m^2

    # Electrical
    'cons': total electrical consumption power, kW
    'prod: total electrical production power, kW
    'cons_noHVAC': approximate electrical consumption without the HVAC (cons - heating_total/4 - cooling_total/3.5), kW

In [3]:
# Load data from csv file, assign index and assure it's a datetime object
df = pd.read_csv("data_varennes_subset.csv.zip", index_col=[0], parse_dates=[0])
print(df.head())

                       T_ext    T_int  BIPVT_outletT  BIPVT_dT  BIPVT_heat  \
Datetime                                                                     
2018-01-01 00:00:00 -21.8566  18.4578       -18.6184    3.2382         0.0   
2018-01-01 00:15:00 -21.8434  18.4086       -18.6614    3.1820         0.0   
2018-01-01 00:30:00 -21.9561  18.3142       -18.6459    3.3102         0.0   
2018-01-01 00:45:00 -22.0408  18.2528       -18.7473    3.2935         0.0   
2018-01-01 01:00:00 -22.1299  18.1697       -18.6353    3.4946         0.0   

                      geo_heat  heating_total  cooling_total  window_state  \
Datetime                                                                     
2018-01-01 00:00:00 -54.118034      38.025668            0.0      0.335996   
2018-01-01 00:15:00 -50.619191      26.322083            0.0      0.336057   
2018-01-01 00:30:00  -9.066210      13.017722            0.0      0.336118   
2018-01-01 00:45:00 -37.896533      10.618306            0.0   

# Approach
Here, we will use built-in tools from pandas and pandas-profiling to explore the data.

In [1]:
from pandas_profiling import ProfileReport

In [5]:
df.describe(include="all").T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
T_ext,8640.0,-4.792727,7.204247,-24.472,-9.659237,-3.421715,0.626406,10.9368
T_int,8640.0,20.843961,1.03449,17.5971,20.19865,20.9307,21.606525,23.2075
BIPVT_outletT,8640.0,0.465107,9.144518,-21.294,-5.65258,0.75148,4.63898,33.2042
BIPVT_dT,8640.0,5.257835,5.101538,-0.44238,2.417294,3.21013,5.56366,29.98461
BIPVT_heat,8640.0,1.936815,4.157975,-0.243155,0.0,0.0,2.149818,36.356439
geo_heat,8640.0,-24.434989,24.39129,-96.621649,-39.543297,-14.916776,-2.828047,12.449837
heating_total,8640.0,30.145582,27.092717,0.0,7.612362,19.346469,48.169892,110.490381
cooling_total,8640.0,-0.000244,0.001268,-0.012138,0.0,0.0,0.0,0.0
window_state,8640.0,0.60137,0.153241,0.335996,0.468683,0.60137,0.734057,0.866744
heating_slab_ratio,8640.0,0.24811,0.152455,0.0,0.12791,0.249396,0.332952,0.967152


In [11]:
profile = ProfileReport(df)
profile.to_file("pandas-profile.html")

# You can also open this directly in Jupyter Notebook by using widgets
# run `conda install -c conda-forge ipywidgets` first
# profile.to_widgets()

  variable_stats = pd.concat(ldesc, join_axes=pd.Index([names]), axis=1)


## Conclusion
+ Run as a preliminary step
+ Learn what these statistics mean
+ Add your own given your end use