In [1]:
import pandas as pd
import numpy as np
from scipy.stats import truncnorm, invgamma
import pickle
import math
import time

### Load temperatures data

In [7]:
df_temp=pd.read_csv('data/temp_2016.csv',sep=',')
df_temp=df_temp.drop(df_temp.columns[0],axis=1)
df_temp=df_temp.sort_values(by='date')
#df_temp.head()

Unnamed: 0,date,t,hour
1707,2016-01-01,9.044444,09:00
1710,2016-01-01,11.005556,18:00
1709,2016-01-01,11.437736,15:00
1708,2016-01-01,11.196296,12:00
1711,2016-01-01,10.683333,21:00


In [8]:
#temperatures every day at 3pm
temp_day=df_temp[df_temp.hour=="15:00"]
temp_day.head()
T_h=list(temp_day.t)

### Load electricity data

In [9]:
mypath="data/"
df=pd.read_csv(mypath+"cdc_conso_daytypes.csv",sep=",",encoding='latin-1')
df.index=range(0,len(df))

In [11]:
df=df.sort_values(by=['Date'])
df.index=range(0,len(df))
#Valeurs intermédiaires à partir de 2017

In [12]:
#On se concentre d'abord sur une année afin d'initialiser le filtre
df=df[(df.Date >'2016-01-01') & (df.Date <'2017-01-01')]
df.index=range(0,len(df))

In [17]:
df['Consommation']=df['Consommation (MW)']*10**6
#df.head()

Unnamed: 0.1,Unnamed: 0,Date - Heure,Date,Heure,Consommation (MW),Qualite donnee,daytype0,daytype1,daytype2,daytype3,daytype4,daytype6,daytype5,daytype7,daytype8,daytype,Consommation
0,149461,2016-01-02T09:00:00+01:00,2016-01-02,09:00,52975,DÃ©finitive,False,False,True,False,False,False,False,True,True,8,52975000000
1,149460,2016-01-02T07:00:00+01:00,2016-01-02,07:00,49417,DÃ©finitive,False,False,True,False,False,False,False,True,True,8,49417000000
2,149462,2016-01-02T10:00:00+01:00,2016-01-02,10:00,55737,DÃ©finitive,False,False,True,False,False,False,False,True,True,8,55737000000
3,149463,2016-01-02T11:00:00+01:00,2016-01-02,11:00,57237,DÃ©finitive,False,False,True,False,False,False,False,True,True,8,57237000000
4,149464,2016-01-02T13:00:00+01:00,2016-01-02,13:00,61223,DÃ©finitive,False,False,True,False,False,False,False,True,True,8,61223000000


# Gibbs Sampling

In [19]:
def pf_init_gibbs(temperature_list, daytype_list, consumption_list, init_iterations, nb_particles, sigma2, kappa, u_heat, log):
    """
    Parameters :
    temperature_list: list of temperatures (per day)
    daytype_list: list of daytypes (per day)
    consumption_list: list of electricity consumption (per day)
    init_iterations: number of iterations for the initialization step
    nb_particles: number of particles
    sigma2: initial sigma squared (fixed)
    kappa: 
    u_heat: 
    log: if True returns value of the computed parameters during the sampling
    """
    # Variables initialization
    s = np.zeros((init_iterations, nb_particles)) 
    g_heat = np.zeros((init_iterations, nb_particles))
    #sigma_s and sigma_g are fixed
    sigma_s_star_2 = np.zeros((1, nb_particles)) 
    sigma_g_star_2 = np.zeros((1, nb_particles))

    #Gibbs : Initialization
    s[0,0] = truncnorm.rvs(a = 0,b = math.inf, loc= 0, scale = 10**4, size=1)[0]
    g_heat[0,0] =  truncnorm.rvs(a = - math.inf,b = 0, loc= 0, scale = 10**4, size=1)[0]
    sigma_s_star_2[0, 0] = invgamma.rvs(a=10**(-2), scale=10**(2), size = 1)[0]
    sigma_g_star_2[0, 0] = invgamma.rvs(a=10**(-2), scale=10**(2), size = 1)[0]
    
    #Gibbs : Step 0
    sigma_s_star0 = math.sqrt(sigma_s_star_2[0,0])
    sigma_g_star0 = math.sqrt(sigma_g_star_2[0,0])
    for i in range(1, init_iterations):
        s[i,0] = s[i-1,0] + \
                 truncnorm.rvs(a = -s[i-1,0]/sigma_s_star0,b = math.inf, loc= 0, scale = sigma_s_star0, size=1)[0] #page 18
        g_heat[i,0] = g_heat[i-1,0] + \
                      truncnorm.rvs(a = -math.inf, b=- g_heat[i-1,0]/sigma_g_star0, loc= 0, scale = sigma_g_star0, size=1)[0]
    
    #Gibbs : step t > 0
    for j in range(1, nb_particles):
        if log:
            print("Particle " + str(j) + "/" + str(nb_particles))
        else:
            if(j%10**4==0):
                print("Particle " + str(j) + "/" + str(nb_particles))
        s[:,j] = s[:,j-1]
        g_heat[:,j] = g_heat[:,j-1]
        sigma_s_star_2[:,j] = sigma_s_star_2[:,j-1]
        sigma_g_star_2[:,j] = sigma_g_star_2[:,j-1]
        
        # Simulate s0
        #Compute variance and mean denominator
        denom_s_0 = (10**4)*sigma_s_star_2[0, j]*(kappa[daytype_list[0]]**2) + sigma2*sigma_s_star_2[0, j] + (10**8)*sigma2 
        #Compute mean numerator
        numerator_mean_s_0 = (10**4)*sigma2* s[1,j] + (10**8)*sigma_s_star_2[0, j]*kappa[daytype_list[0]]*consumption_list[0]
        if (u_heat > temperature_list[0]):
            numerator_mean_s_0 -= (10**4)*sigma_s_star_2[0, j]*kappa[daytype_list[0]]*g_heat[0,j]*(temperature_list[0] - u_heat)
        #Compute the final parameters of the truncated normal that simulates from the full conditional of s_0
        #Mean
        mean_s_0 = numerator_mean_s_0 / denom_s_0
        #Variance
        var_s_0 = ((10**8) *sigma2*sigma_s_star_2[0, j]) / denom_s_0
        std_s_0 = math.sqrt(var_s_0)

        a=-mean_s_0/std_s_0
        while True:
            s[0,j] = truncnorm.rvs(a = a, b = math.inf, loc= mean_s_0, scale = std_s_0, size=1)[0]
            if(s[0,j]!=math.inf):
                break
        if log:
            print("s[0,"+str(j)+"]")
            print(s[0,j])
        
        # Simulate s(i), i>0
        for i in range(1, init_iterations):
            denom_s_i = 2*sigma2 + sigma_s_star_2[0, j]*(kappa[daytype_list[i]]**2)
            dependence_next_s = 0
            if (i+1 < init_iterations-1):
                dependence_next_s = s[i+1,j]
            #Compute mean numerator
            numerator_mean_s_i = sigma2*(s[i-1,j] + dependence_next_s) + \
                                 sigma_s_star_2[0, j]*kappa[daytype_list[i]]*(consumption_list[i])
            if (u_heat > temperature_list[i]):
                numerator_mean_s_i = numerator_mean_s_i - \
                                     sigma_s_star_2[0, j]*kappa[daytype_list[i]]*g_heat[i,j]*(temperature_list[i] - u_heat)
            mean_s_i = numerator_mean_s_i / denom_s_i
            var_s_i = (sigma2*sigma_s_star_2[0, j]) / denom_s_i
            std_s_i = math.sqrt(var_s_i)
            
            a=-mean_s_i/std_s_i
            temp=0
            while True:
                s[i,j] = truncnorm.rvs(a = a, b = math.inf, loc= mean_s_i, scale = std_s_i, size=1)[0]
                if(s[i,j]!=math.inf and s[i,j]!=-math.inf):
                    break
                
                temp+=1
                if(temp%5==0):
                    print(temp)
            if log:
                print("s["+str(i)+","+str(j)+"]")
                print(s[i,j])
        
        # Simulate g_heat0
        denom_g_0 = sigma2*sigma_g_star_2[0, j] + (10**8)*sigma2
        numerator_mean_g_0 = (10**8)*sigma2* g_heat[1,j]
        if (u_heat > temperature_list[0]):
            denom_g_0 = denom_g_0 + (10**8)*sigma_g_star_2[0, j]*((temperature_list[0] - u_heat )**2)
            numerator_mean_g_0 = numerator_mean_g_0 + \
                                 (10**8)*sigma_g_star_2[0, j]*(temperature_list[0] - u_heat)*(consumption_list[0] - s[0,j]*kappa[daytype_list[0]])
        #Compute the final parameters of the truncated normal that simulates from the full conditional of g_0
        mean_g_0 = numerator_mean_g_0 / denom_g_0
        var_g_0 = ((10**8) *sigma2*sigma_g_star_2[0, j]) / denom_g_0
        std_g_0 = math.sqrt(var_g_0)

        b=-mean_g_0/std_g_0
        while True:
            g_heat[0,j] =  truncnorm.rvs(a = -math.inf, b = b, loc= mean_g_0, scale = std_g_0, size=1)[0]
            if(g_heat[0,j]!=-math.inf):
                break
        if log:
            print("g_heat["+str(0)+","+str(j)+"]")
            print(g_heat[0,j])

        # Simulate g_heat(i), i>0
        for i in range(1, init_iterations):
            dependence_next_g = 0
            if (i+1 < init_iterations-1):
                dependence_next_g = g_heat[i+1,j]

            denom_g_i = 2*sigma2
            numerator_mean_g_i = sigma2*(g_heat[i-1,j] + dependence_next_g)
            if (u_heat > temperature_list[i]):
                denom_g_i = denom_g_i + sigma_g_star_2[0, j]*((temperature_list[i] - u_heat )**2)
                numerator_mean_g_i = numerator_mean_g_i + \
                                     sigma_g_star_2[0, j]*(temperature_list[i] - u_heat )*(consumption_list[i] - s[i,j]*kappa[daytype_list[i]])

            mean_g_i = numerator_mean_g_i / denom_g_i
            var_g_i = (sigma2*sigma_g_star_2[0, j]) / denom_g_i
            std_g_i = math.sqrt(var_g_i)

            b=-mean_g_i/std_g_i
            while True:
                g_heat[i,j] =  truncnorm.rvs(a = -math.inf, b = b, loc= mean_g_i, scale = std_g_i, size=1)[0]
                if(g_heat[i,j]!=-math.inf):
                    break
            if log:
                print("g_heat["+str(i)+","+str(j)+"]")
                print(g_heat[i,j])

        # Simulate the variances
        shape_variances = 0.01 + ((init_iterations - 1)/2)
        s_lag = np.roll(s[:,j], 1)
        s_lag[0] = s[0,j]
        scale_s = (0.01 + sum((s[:,j] - s_lag)**2))**(-1)
        sigma_s_star_2[0, j] = invgamma.rvs(a=shape_variances, scale=scale_s, size = 1)[0]
        g_lag = np.roll(g_heat[:,j], 1)
        g_lag[0] = g_heat[0,j]
        scale_g = (0.01 + sum((g_heat[:,j] - g_lag)**2))**(-1)
        sigma_g_star_2[0, j] = invgamma.rvs(a=shape_variances, scale=scale_g, size = 1)[0]

        if log:
            print(s[:,j])
            print(g_heat[:,j])

    # Return the initialization of the Particle Filter at date (init_iterations - 1)
    s_init = s[init_iterations-1,]
    g_heat_init = g_heat[init_iterations-1,:]
    sigma_s_init = np.sqrt(sigma_s_star_2[0,nb_particles-1])
    sigma_g_init = np.sqrt(sigma_g_star_2[0,nb_particles-1])

    x_season = kappa[daytype_list[init_iterations-1]]*s_init
    x_heat = np.maximum((temperature_list[init_iterations-1]-u_heat)*g_heat_init,0)
    x_init = x_season + x_heat

    """
    if log:
        return s_init, g_heat_init, sigma_s_init, sigma_g_init, x_init, s, g_heat, sigma_s_star_2, sigma_g_star_2
    else:
        return s_init, g_heat_init, sigma_s_init, sigma_g_init, x_init
    """
    return s_init, g_heat_init, sigma_s_init, sigma_g_init, x_init, s, g_heat, sigma_s_star_2, sigma_g_star_2

## Function parameters required

In [29]:
nb_days = 15
nb_particles = 10**5

consumption = list(df[df['Heure']=='15:00']['Consommation'][:nb_days])
temperature_ts = T_h[:nb_days] #france mean temp in jan 2016
daytype_ts = list(df[df['Heure']=='15:00']['daytype'][:nb_days])

In [20]:
#kappa daytype
N_daytype=9
k_day=npr.dirichlet(np.ones(N_daytype),1)
kappa = k_day[0].tolist()
kappa

[0.1129031529568417,
 0.05344803982865821,
 0.09173007150355728,
 0.01233676171968247,
 0.025330898161188043,
 0.29715592398865215,
 0.07608723983649404,
 0.18879344171012014,
 0.1422144702948058]

In [21]:
#sigma 2
sigma2=invgamma.rvs(a=10**(-2), scale=10**(2), size = 1)[0]
sigma2

7.7149398627504921e+141

In [22]:
#u_heat
u_h=npr.normal(14,1)  #fixed parameter, invariant to time
u_h

13.671220870543294

In [23]:
#Execute Gibbs sampling
start_time = time.time()
s_init, g_heat_init, sigma_s_init, sigma_g_init, x_init, s_evol, g_heat_evol, sigma_s_star_2_evol, sigma_g_star_2_evol = pf_init_gibbs(temperature_ts, daytype_ts, consumption, nb_days, nb_particles, sigma2, kappa, u_h, False)
print("--- %s seconds ---" % (time.time() - start_time))

Particle 10000/100000
Particle 20000/100000
Particle 30000/100000
Particle 40000/100000
Particle 50000/100000
Particle 60000/100000
Particle 70000/100000
Particle 80000/100000
Particle 90000/100000
--- 443.4609091281891 seconds ---


In [24]:
x_init[:20]

array([  1.16093549e+11,   6.10701946e+10,   5.74572969e+09,
         4.75739198e+09,   4.11648732e+09,   3.72364616e+09,
         3.47391174e+09,   3.30766255e+09,   3.19165005e+09,
         3.10675781e+09,   3.04159846e+09,   2.98916605e+09,
         2.94503083e+09,   2.90633069e+09,   2.87118720e+09,
         2.83835551e+09,   2.80700763e+09,   2.77659420e+09,
         2.74675410e+09,   2.71725420e+09])

In [30]:
#Compute w_init
w_init = np.exp(-(np.square(consumption[nb_days-1]-x_init))/(2*sigma2))
w_init[:10]

array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [31]:
#Write initial parameters from Gibbs sampling in a file
output_file ='data/parameters_init.pkl'
parameters = {
    's_init':s_init,
    'g_heat_init':g_heat_init,
    'sigma_s_init':sigma_s_init,
    'sigma_g_init':sigma_g_init,
    'x_init':x_init,
    'w_init':w_init,
    's_evol':s_evol,
    'g_heat_evol':g_heat_evol,
    'sigma_s_star_2_evol':sigma_s_star_2_evol,
    'sigma_g_star_2_evol':sigma_g_star_2_evol,
    'sigma2': sigma2,
    'u_h':u_h,
    'kappa': kappa
}
output = open(output_file, 'wb')
pickle.dump(parameters, output)
output.close()

In [32]:
#Steps to Load pickle file
input_file ='data/parameters_init.pkl'
pkl_file = open(input_file, 'rb')
parameters_init = pickle.load(pkl_file)
parameters_init

{'g_heat_evol': array([[ -1.23046128e+04,  -1.33567791e+03,  -5.65927717e+09, ...,
          -2.16993786e+00,  -2.05457068e+00,  -2.43550419e+00],
        [ -7.81634030e+08,  -5.65927717e+09,  -1.08156836e+10, ...,
          -1.93927265e+00,  -2.22336706e+00,  -2.18744745e+00],
        [ -1.02590443e+10,  -1.59720900e+10,  -1.53330258e+10, ...,
          -1.87326499e+00,  -2.07957566e+00,  -1.92247607e+00],
        ..., 
        [ -4.24261743e+09,  -1.20360693e+10,  -4.85398311e+09, ...,
          -1.66758199e+00,  -1.14780235e+00,  -9.06093549e-01],
        [ -1.63132145e+10,  -1.68450867e+09,  -2.42699156e+09, ...,
          -1.05285449e+00,  -3.99279081e-01,  -5.35378384e-01],
        [ -2.45189115e+10,  -1.28980009e+10,  -1.21349578e+09, ...,
          -7.74378128e-01,  -7.01308254e-02,  -8.55318383e-02]]),
 'g_heat_init': array([ -2.45189115e+10,  -1.28980009e+10,  -1.21349578e+09, ...,
         -7.74378128e-01,  -7.01308254e-02,  -8.55318383e-02]),
 'kappa': [0.1129031529568417,


### Part 2: regularize weights and x if necessary