#### This notebook is to find how many readings we need to get a "good" estimation of energy breakdown

In [1]:
import numpy as np
import sys
sys.path.append("../code/")
from structure import *
from algo import *
from algo_fix_season import *
from basic import *
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
from autograd.numpy import linalg as LA
import pickle
from tabulate import tabulate
%load_ext autoreload
%autoreload 2

In [2]:
def get_error(h, a, s, num_latent, tensor):
    mask_tensor = ~np.isnan(tensor)
    h = h.reshape(-1, num_latent)
    a = a.reshape(-1, num_latent)
    s = s.reshape(-1, num_latent)
    hat = np.einsum('Ma, Na, Oa -> MNO', h, a, s)
    errors = {}
    
    for i in range(a.shape[0]):
        errors[APPLIANCE_ORDER_MISSING[i]] = np.sqrt(mean_squared_error(hat[:, i][mask_tensor[:, i, :s.shape[0]]], tensor[:, i][mask_tensor[:, i, :s.shape[0]]]))
    return errors

#### we use the data of year 2015, without any appliance readings

In [3]:
tensor, homeids = get_tensor(2015, 'missing')

#### First, we get the results of using all the observations

In [59]:
home, app, season = factorization(tensor, num_latent=3, dis=False)
all_err = get_error(home, app, season, 3, tensor)

In [60]:
headers = ['Appliance', 'Error']
data = ([(v,k) for k,v in all_err.items()]) # flip the code and name and sort
print(tabulate(data, headers=headers))

  Appliance  Error
-----------  --------------
   181.454   use
   108.241   air1
    15.8383  clotheswasher1
    75.082   drye1
    13.0611  dishwasher1
   112.823   furnace1
    21.8908  kitchenapp1
    10.7678  microwave1
    53.6705  refrigerator1


#### with 10 random seeds, we randomly select k appliance readings to see the results

In [61]:
errors = {}
for k in range(0, 10, 2):
    errors[k] = {}
    for random_seed in range(10):
        np.random.seed(random_seed)

        tensor_copy = tensor.copy()
        tensor_copy[:, 1:] = np.NaN
        
        num_home, num_app, num_season = tensor.shape
        hid = np.random.choice(num_home, 1)
        aid = np.random.choice(np.arange(1, num_app), 1)
        sid = np.random.choice(num_season, 1)

        tensor_copy[hid, aid, sid] = tensor[hid, aid, sid]
        home, app, season = factorization(tensor_copy, num_latent=3)
        errors[k][random_seed] = get_error(home, app, season, 3, tensor)

In [68]:
hid = np.random.choice(num_home, 5)
aid = np.random.choice(np.arange(1, num_app), 5)
sid = np.random.choice(num_season, 5)

In [65]:
mean_error = {}
for k in range(0, 10, 2):
    mean_error[k] = pd.DataFrame(errors[k]).mean(axis=1)

In [67]:
pd.DataFrame(mean_error)

Unnamed: 0,0,2,4,6,8
air1,571.697059,571.697059,571.697059,571.697059,571.697059
clotheswasher1,17.140572,17.140572,17.140572,17.140572,17.140572
dishwasher1,19.147143,19.147143,19.147143,19.147143,19.147143
drye1,96.919089,96.919089,96.919089,96.919089,96.919089
furnace1,199.781761,199.781761,199.781761,199.781761,199.781761
kitchenapp1,25.554622,25.554622,25.554622,25.554622,25.554622
microwave1,14.439758,14.439758,14.439758,14.439758,14.439758
refrigerator1,87.825367,87.825367,87.825367,87.825367,87.825367
use,153.135649,153.135649,153.135649,153.135649,153.135649
