In [104]:
import numpy as np
import pandas as pd
import sys
sys.path.append("../code/")
from tensor import *
from sklearn.metrics import mean_squared_error


In [115]:
data = np.load("./data-2013-2017-observed-filtered.npy").item()

In [116]:
selected_appliance = ['use', 'air1', 'refrigerator1','furnace1', 'clotheswasher1', 'dishwasher1',  'microwave1']

In [117]:
data[2013][93]['use'].values

array([ 632.94574,  562.6963 ,  578.61914,  656.1875 ,  928.1124 ,
       1743.7012 , 1891.1389 , 2044.395  , 1686.1647 ,  988.71326,
        676.6726 , 1242.0251 ], dtype=float32)

In [118]:
def create_tensor(year, data):
    raw_data = data[year]
    homeids = list(raw_data.keys())
    tensor = np.zeros((len(homeids), 7, 12))
    for idx, hid in enumerate(homeids):
        for i, app in enumerate(selected_appliance):
            tensor[idx][i] = raw_data[hid][app].values.T
    return tensor

In [119]:
all_tensor = {}
for year in [2013, 2014, 2015, 2016, 2017]:
    all_tensor[year] = create_tensor(year, data)

In [120]:
def get_train_test(tensor, random_seed):
    np.random.seed(random_seed)
    num = tensor.shape[0]
    
    index = np.random.choice(num, int(num/2), False)
    train = tensor[index]
    
    test_index = [x for x in np.array(range(num)) if x not in index]
    test = tensor[test_index]
    
    return train, test

In [125]:
def get_errors(year1, year2):
    
    errors = {}
    for test_set in ['same', 'diff']:
        errors[test_set] = {}
        for appliance in selected_appliance[1:]:
            errors[test_set][appliance] = {}
        
    for random_seed in range(20):

        print(random_seed)
        # between 2013 and 2014
        train, gt_test_same = get_train_test(all_tensor[year1], random_seed)
        gt_test_diff = all_tensor[year2]

        agg_test_same = gt_test_same.copy()
        agg_test_diff = gt_test_diff.copy()

        agg_test_same[:, 1:, :] = np.NaN
        agg_test_diff[:, 1:, :] = np.NaN

        # for the same year
        same_tensor = np.concatenate((train, gt_test_same))
        diff_tensor = np.concatenate((train, gt_test_diff))

        H_same, A_same, T_same = learn_HAT_adagrad(case, same_tensor, 3, 3, 10000, 0.5, dis=True, random_seed = 0)
        H_diff, A_diff, T_diff = learn_HAT_adagrad(case, diff_tensor, 3, 3, 10000, 0.5, dis=True, random_seed = 0)

        # calculate error
        pred_same = multiply_case(H_same, A_same, T_same, case)
        pred_diff = multiply_case(H_diff, A_diff, T_diff, case)

        pred_test_same = pred_same[train.shape[0]:]
        pred_test_diff = pred_diff[train.shape[0]:]

        for i in range(1, 7):
            # error on same year
            error_same = np.sqrt(mean_squared_error(pred_test_same[:, i, :].reshape(1, -1), gt_test_same[:, i, :].reshape(1, -1)))
            error_diff = np.sqrt(mean_squared_error(pred_test_diff[:, i, :].reshape(1, -1), gt_test_diff[:, i, :].reshape(1, -1)))
            errors['same'][selected_appliance[i]][random_seed] = error_same
            errors['diff'][selected_appliance[i]][random_seed] = error_diff
            print(i, error_same, error_diff)
    return errors

In [126]:
err = get_errors(2015, 2014)

0
613.3972522484154 0
75.93347561987106 500
72.05001143168013 1000
71.13842140053833 1500
70.74347830142011 2000
70.3195643600551 2500
69.96023011313915 3000
69.71018917858912 3500
69.56175888549414 4000
69.44636099694536 4500
69.36791642204834 5000
69.29997534469354 5500
69.24045552826692 6000
69.19124606964759 6500
69.14692926600247 7000
69.11197690840164 7500
69.08303139313281 8000
69.05902818948445 8500
69.03927007116674 9000
69.02296911809013 9500
634.5321971245016 0
77.77288757510495 500
73.15866308653585 1000
72.59012127716355 1500
72.36914696482238 2000
72.19440700077153 2500
71.99643501325995 3000
71.79009833502374 3500
71.61244166097927 4000
71.47226096888956 4500
71.36749494298704 5000
71.29306571176507 5500
71.23267109484911 6000
71.18492103174667 6500
71.14600203053944 7000
71.11410347602441 7500
71.08829943572657 8000
71.06799251451154 8500
71.05238640610663 9000
71.04059396851648 9500
1 7861.666053077516 6186.984323426932
2 815.5419827551639 904.349144347079
3 7830.05695

KeyboardInterrupt: 

In [114]:
errors = {}

for year1 in range(2013, 2018):
    print(year1)
    errors[year1] = {}
    for year2 in range(2013, 2018):
        if year1 !=  year2:
            print(year2)
            errors[year1][year2] = get_errors(year1, year2)


2013
2014
0
1
2
3


KeyboardInterrupt: 

In [92]:
pd.Series(errors['same']['air1']).values

array([5225.19009545, 8560.68437719, 8401.18246689, 8545.72809968,
       6845.85357419, 8350.18118934, 6832.886701  , 5403.01555256,
       7145.51436009, 8272.80383058, 5429.16221467, 6884.10410731,
       8196.87813973, 7257.33809912, 7835.23613426, 8097.6175433 ,
       6081.85411389, 7473.34543076, 5387.67168711, 5946.64590686])

In [130]:
errors = np.load("../code/errors.npy", encoding='latin1').item()

In [139]:
pd.Series(errors[2013][2014]['diff']['air1']).values

array([6025.40644899, 5823.60642963, 6136.38675961, 5865.06885915,
       6020.56337346, 6081.0445012 , 5886.63626399, 6140.85733322,
       5950.02461834, 5967.69160191, 5994.03789764, 6090.65370371,
       6138.62723184, 5830.00480675, 5949.13876172, 6200.83392983,
       5923.45486574, 6012.16781266, 6184.19150857, 6128.71588289])

In [145]:
pvalue = {}
for appliance in selected_appliance[1:]:
    pvalue[appliance] = {}
    for year1 in range(2013, 2018):
        pvalue[appliance][year1] = {}
        for year2  in range(2013, 2018):
            if year1 != year2:
                pvalue[appliance][year1][year2] = {}

from scipy import stats
for year1 in range(2013, 2018):
    for year2  in range(2013, 2018):
        if year1 != year2:
            for appliance in selected_appliance[1:]:
                
                statistic, pvalue[appliance][year1][year2] = stats.ttest_rel(pd.Series(errors[year1][year2]['same'][appliance]).values, pd.Series(errors[year1][year2]['diff'][appliance]).values )
                print(year1, year2, appliance)

2013 2014 air1
2013 2014 refrigerator1
2013 2014 furnace1
2013 2014 clotheswasher1
2013 2014 dishwasher1
2013 2014 microwave1
2013 2015 air1
2013 2015 refrigerator1
2013 2015 furnace1
2013 2015 clotheswasher1
2013 2015 dishwasher1
2013 2015 microwave1
2013 2016 air1
2013 2016 refrigerator1
2013 2016 furnace1
2013 2016 clotheswasher1
2013 2016 dishwasher1
2013 2016 microwave1
2013 2017 air1
2013 2017 refrigerator1
2013 2017 furnace1
2013 2017 clotheswasher1
2013 2017 dishwasher1
2013 2017 microwave1
2014 2013 air1
2014 2013 refrigerator1
2014 2013 furnace1
2014 2013 clotheswasher1
2014 2013 dishwasher1
2014 2013 microwave1
2014 2015 air1
2014 2015 refrigerator1
2014 2015 furnace1
2014 2015 clotheswasher1
2014 2015 dishwasher1
2014 2015 microwave1
2014 2016 air1
2014 2016 refrigerator1
2014 2016 furnace1
2014 2016 clotheswasher1
2014 2016 dishwasher1
2014 2016 microwave1
2014 2017 air1
2014 2017 refrigerator1
2014 2017 furnace1
2014 2017 clotheswasher1
2014 2017 dishwasher1
2014 2017 mic

In [147]:
pd.DataFrame(pvalue['air1'])

Unnamed: 0,2013,2014,2015,2016,2017
2013,,9.821887e-13,3.76516e-10,1.949996e-14,5.466986e-15
2014,1.1e-05,,0.0001797171,4.207961e-05,1.916349e-06
2015,0.416393,7.159243e-07,,6.688434e-08,2.59633e-11
2016,0.011734,0.001804721,0.2193509,,1.096362e-07
2017,1e-06,0.9311754,0.002374162,0.1350081,


In [149]:
num = 20
index = np.random.choice(num, 9, False)
index

array([ 6, 10,  4, 16,  5,  7, 17, 19, 11])

In [152]:
for year1 in range(2013, 2018):
    for year2  in range(2013, 2018):
        if year1 != year2:
            print("nohup python cross_years_validate.py {} {} &".format(year1, year2))

nohup python cross_years_validate.py 2013 2014
nohup python cross_years_validate.py 2013 2015
nohup python cross_years_validate.py 2013 2016
nohup python cross_years_validate.py 2013 2017
nohup python cross_years_validate.py 2014 2013
nohup python cross_years_validate.py 2014 2015
nohup python cross_years_validate.py 2014 2016
nohup python cross_years_validate.py 2014 2017
nohup python cross_years_validate.py 2015 2013
nohup python cross_years_validate.py 2015 2014
nohup python cross_years_validate.py 2015 2016
nohup python cross_years_validate.py 2015 2017
nohup python cross_years_validate.py 2016 2013
nohup python cross_years_validate.py 2016 2014
nohup python cross_years_validate.py 2016 2015
nohup python cross_years_validate.py 2016 2017
nohup python cross_years_validate.py 2017 2013
nohup python cross_years_validate.py 2017 2014
nohup python cross_years_validate.py 2017 2015
nohup python cross_years_validate.py 2017 2016
