In [2]:
import pandas as pd
import numpy as np
import datetime
import time

In [3]:
#计算误差函数
def MSE(y, Y):
    return np.mean((y-Y)**2)

#构建神经网络
class NeuralNetwork(object):
    def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        # Set number of nodes in input, hidden and output layers.
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes

        # Initialize weights
        self.weights_input_to_hidden = np.random.normal(0.0, self.input_nodes**-0.5, 
                                       (self.input_nodes, self.hidden_nodes))
        self.weights_hidden_to_output = np.random.normal(0.0, self.hidden_nodes**-0.5, 
                                       self.hidden_nodes)
        self.lr = learning_rate
        self.activation_function = lambda x : 1 / (1 + np.exp(-x))  # Replace 0 with your sigmoid calculation.
    
    def train(self, features, targets, train_perishable_0):
        n_records = features.shape[0]
        delta_weights_i_h = np.zeros(self.weights_input_to_hidden.shape)
        delta_weights_h_o = np.zeros(self.weights_hidden_to_output.shape)
        
        for X, y,z in zip(features, targets,train_perishable_0):

            hidden_input = np.dot(X,self.weights_input_to_hidden) # signals into hidden layer
            hidden_input = hidden_input.astype(np.float64)
            hidden_output = self.activation_function(hidden_input)
            final_input = np.dot(hidden_output,self.weights_hidden_to_output) # signals into final output layer
            final_output = self.activation_function(final_input)
            error = y[0] - final_output # Output layer error is the difference between desired target and actual output.
            output_error_term = error * final_output * (1 - final_output)
            
            hidden_error = np.dot(output_error_term, self.weights_hidden_to_output)
            hidden_error_term = hidden_error * hidden_output * (1-hidden_output)
            delta_weights_h_o += output_error_term * hidden_output
            tem = hidden_error * X[:, None]
            tem = tem.astype(np.float64)
            delta_weights_i_h += tem
        self.weights_hidden_to_output += self.lr * delta_weights_h_o / n_records # update hidden-to-output weights with gradient descent step
        self.weights_input_to_hidden += self.lr * delta_weights_i_h / n_records# update input-to-hidden weights with gradient descent step
    def run(self, features):
        hidden_inputs = np.dot(features,self.weights_input_to_hidden) 
        hidden_inputs = hidden_inputs.astype(np.float64)
        hidden_outputs = self.activation_function(hidden_inputs) # signals from hidden layer
        final_inputs = np.dot(hidden_outputs,self.weights_hidden_to_output) # signals into final output layer
        final_outputs = final_inputs # signals from final output layer 
        return final_outputs


In [4]:
#处理store数据
stores = pd.read_csv('data/stores.csv')
stores_fields = ['city', 'state', 'type', 'cluster']
for each in stores_fields:
    dummies = pd.get_dummies(stores[each], prefix=each, drop_first=False)
    stores = pd.concat([stores, dummies], axis=1)
fields_to_drop = ['city', 'state', 'type', 'cluster']
stores = stores.drop(fields_to_drop, axis=1)

In [5]:
#处理items数据
items = pd.read_csv('data/items.csv')
items_fields = ['family', 'class', 'perishable']
for each in items_fields:
    dummies = pd.get_dummies(items[each], prefix=each, drop_first=False)
    items = pd.concat([items, dummies], axis=1)
fields_to_drop = ['family', 'class', 'perishable']
items = items.drop(fields_to_drop, axis=1)

In [6]:
#处理oil数据,对于空值，用前后2天的数据取平均值;有些日期没有价格，则添加一条记录
oil = pd.read_csv('data/oil.csv')
mean, std = oil['dcoilwtico'].mean(), oil['dcoilwtico'].std()
for index,row in oil.iterrows():
    if (np.isnan(row['dcoilwtico'])):
        if index == 0:
            oil.loc[index,'dcoilwtico'] = (oil.loc[1,'dcoilwtico'] + oil.loc[2,'dcoilwtico']) / 2
        elif index == len(oil):
            oil.loc[index,'dcoilwtico'] = (oil.loc[len(oil)-1,'dcoilwtico'] + oil.loc[len(oil)-2,'dcoilwtico']) / 2          
        else :
            oil.loc[index,'dcoilwtico'] = (oil.loc[index-1,'dcoilwtico'] + oil.loc[index+1,'dcoilwtico']) / 2
oil.loc[:,'dcoilwtico'] = (oil['dcoilwtico']-mean)/std

In [7]:
#处理holiday数据
holidays = pd.read_csv('data/holidays_events.csv')
holidays_fields = ['type', 'locale', 'locale_name', 'description','transferred']
for each in holidays_fields:
    dummies = pd.get_dummies(holidays[each], prefix=each, drop_first=False)
    holidays = pd.concat([holidays, dummies], axis=1)
fields_to_drop = ['type', 'locale', 'locale_name', 'description','transferred']
holidays = holidays.drop(fields_to_drop, axis=1)

In [8]:
trains = pd.read_csv('data/train.csv',chunksize=10000)


In [11]:
#数据已清洗完成（除了原油价格），现在开始建立模型
import sys
print ("train_data_begin:"+time.strftime("%H:%M:%S"))
### Set the hyperparameters here ###
iterations = 10
learning_rate = 1
hidden_nodes = 8
output_nodes = 1
#N_i = train_features.shape[1]
network = NeuralNetwork(572, hidden_nodes, output_nodes, learning_rate)

losses = {'train':[], 'validation':[]}
for ii in range(iterations):
    i = 0;
    for train in  trains:
        i = i + 1;
        train['onpromotion_False'] = 0
        train['onpromotion_True'] = 0
        train = pd.merge(train,stores,on='store_nbr')
        train = pd.merge(train,items,on='item_nbr')
        train = pd.merge(train,holidays,how = 'left',on='date')
        train = train.fillna(0)
        drop_list = ['id','date','store_nbr','item_nbr','onpromotion']
        train = train.drop(drop_list, axis=1)
        target_fields = ['unit_sales']
        train_perishable_0 = train['perishable_0']
        train_features,train_targets = train.drop(target_fields, axis=1), train[target_fields]
        network.train(train_features.values, train_targets.values,train_perishable_0.values)
        # Printing out the training progress
        train_loss = MSE(network.run(train_features.values), train_targets.values)
        if i % 1 == 0:
            print(str(ii) + ':train_loss:' + str(train_loss))
print('----------------end-------------')

train_data_begin:15:42:17


KeyboardInterrupt: 