## 软件包加载

In [1]:
import numpy as np                     # 引入基础软件包numpy
import pandas as pd                    # 引入基础软件包pandas
from collections import OrderedDict    # OrderedDict用于记录模型的specification(声明) 
import pylogit as pl                   # 引入基础软件包logit模型软件包pylogit
import matplotlib.pyplot as plt        # 引入绘图软件包

## 数据读入

In [2]:
# 数据读入
data_path =  u'./Data/long_data.csv'
raw_data = pd.read_table(data_path, sep=',', header=0)
raw_data.head(8).T

FileNotFoundError: File b'../Chapters1_Data/long_data.csv' does not exist

## 数据构造

In [None]:
model_data = raw_data[['OBS_ID','ALT_ID','MODE','HINC','PSIZE','TTME','INVC','INVT']]

## 模型搭建

In [None]:
# 声明嵌套形式
nest_membership = OrderedDict()
nest_membership["air_Modes"] = [0]
nest_membership["ground_Modes"] = [1, 2, 3]

# 声明备选项的效用函数
basic_specification = OrderedDict()
basic_names = OrderedDict()
basic_specification["intercept"] = [0, 1, 2]
basic_names["intercept"] = ['ASC_air', 'ASC_train', 'ASC_bus']
# 备选项属性的影响方式可以灵活指定
basic_specification["TTME"] = [[0, 1, 2]]
basic_names["TTME"] = ['TTME']
basic_specification["INVT"] = [[0, 1, 2, 3]]
basic_names["INVT"] = ['INVT']
# 决策者的影响方式也可以灵活指定，但需要注意的是，由于每个选项的决策者属性都一样，因此保证可估计性，只对部分选项生效
basic_specification["HINC"] = [[1, 2]]
basic_names["HINC"] = [ 'HINC_train_bus']
basic_specification["PSIZE"] = [0]
basic_names["PSIZE"] = ['PSIZE_air']

# 模型创建
nested_logit = pl.create_choice_model(data = model_data,
                    alt_id_col="ALT_ID",
                    obs_id_col="OBS_ID",
                    choice_col="MODE",
                    specification=basic_specification,
                    model_type = "Nested Logit",
                    names=basic_names,
                    nest_spec=nest_membership)
nested_logit.fit_mle(np.zeros(9))
nested_logit.summary
# | -----------------------------------------------------------------
# |                 parameters   std_err   t_stats   p_values
# | -----------------------------------------------------------------
# | air_Modes        0.0000      NaN       NaN       NaN     
# | ground_Modes     0.8187      0.668     1.225     0.220   
# | ASC_air          4.0002      1.022     3.914     0.000   
# | ASC_train        4.2224      0.744     5.672     0.000   
# | ASC_bus          3.9471      0.747     5.285     0.000   
# | TTME            -0.0787      0.013    -6.180     0.000   
# | INVT            -0.0038      0.001    -4.718     0.000   
# | HINC_train_bus  -0.0364      0.010    -3.513     0.000   
# | PSIZE_air       -0.7535      0.244    -3.088     0.002   
# |==================================================================

In [None]:
# 创建用于预测的df
prediction_df = model_data[['OBS_ID', 'ALT_ID', 'MODE','TTME', 'INVT','HINC','PSIZE']]
choice_column = "MODE"

# 对火车耗时进行变化
def INVT(x,y):
    if x == 1:
        return y*0.8
    else:
        return y
prediction_df['INVT'] = prediction_df.apply(lambda x: INVT(x.ALT_ID, x.INVT), axis = 1)

# 默认情况下，predict方法返回每个选择情况下每个可用备选方案的预测概率。
prediction_array = nested_logit.predict(prediction_df)

# 存储预测概率
prediction_df["NL_Predictions"] = prediction_array

raw_probability = prediction_df.groupby(['ALT_ID'])['MODE'].mean()
new_probability = prediction_df.groupby(['ALT_ID'])['NL_Predictions'].mean()
print("--------原概率--------")
print(raw_probability)
print("--------新概率--------")
print(new_probability)