In [1]:
import pickle
import pandas as pd
import json
import time
from mode_logit import long_form_data, asclogit_pred
import numpy as np
from sklearn.metrics import confusion_matrix
import copy

## Functions 

In [2]:
def check_iia(base_prob, new_prob, alts, groups=[1]):
    if base_prob.shape[1] < len(alts): 
        base_prob = np.hstack((base_prob, np.zeros((base_prob.shape[0], len(alts)-base_prob.shape[1]))))
    for g in groups:
        print('\nCheck for IIA (groupID = {}):\n------------------------------------------'.format(g))
        print('             Base       New       Change')
        for idx, alt in enumerate(alts.values()):
            print('%-8s %10.4f %10.4f %10.2f%%' % (alt, base_prob[g,idx], new_prob[g,idx], 
                                                   100*(new_prob[g,idx]-base_prob[g,idx])/base_prob[g,idx]))

In [3]:
class modePredictor:
    def __init__(self, ods, persons, rf_model=None, rf_features=[], logit_model=None, logit_features={}):
        self.ods = []
        self.persons = []
        self.person_lookup = {}
        logit_model, logit_features = copy.deepcopy(logit_model), copy.deepcopy(logit_features)
        rf_features, rf_model = copy.deepcopy(rf_features), copy.deepcopy(rf_model)
        if len(logit_features):
            self.logit_alt_attrs = logit_features['alt_attrs']
            self.logit_alt_attr_vars = logit_features['alt_attr_vars']
            self.logit_generic_attrs = logit_features['generic_attrs']
            self.logit_constant = logit_features['constant']
        else:
            self.logit_alt_attrs, self.logit_alt_attr_vars, self.logit_generic_attrs = {}, [], []
        if len(rf_features):
            self.rf_features = rf_features
        else:
             self.rf_features = []
        self.rf_model = rf_model
        self.logit_model = logit_model
        if self.logit_model is not None and self.logit_model['just_point'] is False:
            # for convenience, use just_point=True for all cases so that we can modify the model easily
            self.logit_model['just_point'] = True
            self.logit_model['params'] = {v: p for v, p in zip(
                list(modelDict['model'].coefs.index), list(modelDict['model'].coefs.values))}
        self.base_alts = {0:'drive', 1:'cycle', 2:'walk', 3:'PT'}
        self.new_alts = []
        self.new_alts_like = {}
        self.update_alts()
        self.ods = copy.deepcopy(ods)
        self.persons = copy.deepcopy(persons)
        self.person_lookup = {p['person_id']: p for p in self.persons}
        self.prob = []
        self.v = []     #observed utility for logit
        self.mode = []
        self.generate_feature_df()
        
    def generate_feature_df(self):
        feature_df = pd.DataFrame(self.ods)  
        for feat in ['income', 'age', 'children', 'workers', 'tenure', 'sex', 
                     'bach_degree', 'race', 'cars']:
            new_dummys=pd.get_dummies(feature_df[feat], prefix=feat)
            feature_df=pd.concat([feature_df, new_dummys],  axis=1)
        feature_df['drive_time_minutes'] = feature_df.apply(lambda row: row['activity_routes'][0]['route']['driving'], axis=1)     
        feature_df['cycle_time_minutes'] = feature_df.apply(lambda row: row['activity_routes'][1]['route']['cycling'], axis=1)     
        feature_df['walk_time_minutes'] = feature_df.apply(lambda row: row['activity_routes'][2]['route']['walking'], axis=1)     
        feature_df['PT_time_minutes'] = feature_df.apply(lambda row: row['activity_routes'][3]['route']['pt'], axis=1)
        feature_df['walk_time_PT_minutes'] = feature_df.apply(lambda row: row['activity_routes'][3]['route']['walking'], axis=1)  
        feature_df['drive_time_PT_minutes']=0 
        feature_df['network_dist_km']=feature_df.apply(lambda row: row['drive_time_minutes']*30/60, axis=1)
        self.base_feature_df = copy.deepcopy(feature_df)
        self.feature_df = copy.deepcopy(feature_df)
        
    def set_feature_df(self, feature_df_in):
        self.features_df = copy.deepcopy(feature_df_in)
    
    def get_long_form_data(self):
        nalt = len(self.alts) 
        long_data_df = long_form_data(self.feature_df, alt_attrs=self.logit_alt_attrs, 
            generic_attrs=self.logit_generic_attrs, nalt=nalt, y_true=False)
        self.long_data_df = copy.deepcopy(long_data_df)
        
    def rf_predict(self, method='random'):
        if self.rf_model is None:
            print('[Error] no rf model')
            return
        feature_df, rf_features = copy.deepcopy(self.feature_df), self.rf_features
        for rff in rf_features:
            if rff not in feature_df.columns:
                feature_df[rff]=0
        feature_df = feature_df[rf_features] #reorder columns to match rf model
        mode_probs = self.rf_model.predict_proba(feature_df)
        self.prob = mode_probs
        if method == 'random':
            mode = np.asarray([np.random.choice(range(4), size=1, p=row)[0] for row in mode_probs])
        elif method == 'max':
            mode = self.rf_model.predict(feature_df)
        self.mode = mode
        self.apply_predictions()
        
    def update_alts(self):
        alts = self.base_alts.copy()
        for i, new_alt in enumerate(self.new_alts): alts[4+i] = new_alt
        self.alts = alts
        self.alts_reverse = {v:k for k,v in self.alts.items()}
    
    def mnl_predict(self, method='random', seed=None):
        self.update_alts()
        self.get_long_form_data()
        long_data_df = copy.deepcopy(self.long_data_df)
        prob, mode, v = asclogit_pred(long_data_df, self.logit_model, customIDColumnName='group', 
            method=method, alts=self.alts, seed=seed)
        self.prob, self.mode, self.v = prob, mode, v
        self.apply_predictions()
        
    def quasi_nl_predict(self, nests_spec, method='random', n_sample=10, seed=None):
        """
        nests_spec = [{'name': 'cycle_like', 'alts':['cycle','dockless'], 'sigma':0.5}, {}...]
        """
        self.update_alts()
        self.get_long_form_data()
        long_data_df = copy.deepcopy(self.long_data_df)
        logit_model_tmp = copy.deepcopy(self.logit_model)
        for nest in nests_spec:
            if 'name' not in nest: nest['name'] = '_'.join(nest['alts'])
            long_data_df[nest['name']] =  0
            idx_alts_in_nest = [self.alts_reverse[x] for x in nest['alts']]
            long_data_df.loc[long_data_df['alt'].isin(idx_alts_in_nest), nest['name']] = 1
        
        # mxlogit prediction by simulation
        if seed: np.random.seed(seed)
        std_normal_samples = np.random.randn(n_sample, len(nests_spec))
        std_normal_samples = -np.abs(std_normal_samples)   
        prob = np.zeros(long_data_df.shape[0]).reshape(-1, len(self.alts))
        for sample_row in std_normal_samples:
            for s, nest in zip(sample_row, nests_spec):
                logit_model_tmp['params'][nest['name']] = nest['sigma'] * s
            sample_prob, sample_mode, sample_v =  asclogit_pred(long_data_df, logit_model_tmp, 
                customIDColumnName='group', method='none', alts=self.alts, seed=seed)
            prob += np.asarray(sample_prob)
        prob /= n_sample
        if method == 'random':
            mode = np.asarray([np.random.choice(list(self.alts.keys()), size=1, p=row)[0] for row in prob])
        elif method == 'max':
            mode = prob.argmax(axis=1)
        self.mode, self.prob = mode, prob
        self.apply_predictions()
            
        
    def set_logit_model_params(self, params={}):
        for v, p in params.items(): # v=varname, p=parameter
            self.logit_model['params'][v] = p
    
    def set_new_alt(self, new_alt_spec):
        """
        new_alt_spec = {'name': 'dockless ', 
                        'attrs': {'time_minutes':{'copy':'cycle', 'operation':'-5', 'min':0.1, 'max':None}, 
                                  'walk_time_PT_minutes': 'p-5'
                                  'drive_time_PT_minutes':0 / np.nan},
                        'copy': 'driving'
                        'params': {'ASC':3, 'income_gt100':0}
                       }
        """
        name = new_alt_spec['name']
        new_alt_attrs = new_alt_spec.get('attrs', {})
        new_alt_generic_params = new_alt_spec.get('params', {})
        self.new_alts.append(name)
        self.update_alts()
        
        # alternative specific attributes
        alias  = {'d': 'drive', 'c': 'cycle', 'w': 'walk', 'p': 'PT'}
        for alt_attr in self.logit_alt_attrs:
            if alt_attr in new_alt_attrs:
                attr_info = new_alt_attrs[alt_attr]
                if isinstance(attr_info, (int, float)):
                    self.feature_df['{}_{}'.format(name, alt_attr)] = attr_info
                elif isinstance(attr_info, (dict, str)):
                    if isinstance(attr_info, str):
                        attr_info = {'copy':alias[attr_info[0]], 'operation':attr_info[1:], 'min':0}
                    tmp = np.asarray(self.feature_df['{}_{}'.format(attr_info['copy'], alt_attr)])
                    tmp = eval('tmp' + attr_info['operation'])
                    if 'min' in attr_info: tmp[np.where(tmp < attr_info['min'])] = attr_info['min']
                    if 'max' in attr_info: tmp[np.where(tmp > attr_info['max'])] = attr_info['max']
                    self.feature_df['{}_{}'.format(name, alt_attr)] = tmp
            else:
                self.feature_df['{}_{}'.format(name, alt_attr)] = 0
                print('[warning] no information for {}_{}, set to 0'.format(name, alt_attr))
            self.logit_alt_attrs[alt_attr].append('{}_{}'.format(name, alt_attr))
        # for new attributes first appeard and only for this new alternative 
        for alt_attr in new_alt_attrs:
            if alt_attr not in self.logit_alt_attrs:
                self.feature_df['{}_{}'.format(name, alt_attr)] = new_alt_attrs[alt_attr]   # only numerical values are valid
                tmp = ['nan' for i in range(len(self.alts) - 1)] + ['{}_{}'.format(name, alt_attr)]
                self.logit_alt_attrs[alt_attr] = tmp
        
        # logit model coefficient for this alternative
        for g_attr in self.logit_generic_attrs:
            if 'copy' in new_alt_spec:
                #if can not get, should be copying from the reference level, thus set to 0
                self.logit_model['params']['{} for {}'.format(g_attr, name)] = self.logit_model[
                    'params'].get('{} for {}'.format(g_attr, new_alt_spec['copy']), 0)   
            else: self.logit_model['params']['{} for {}'.format(g_attr, name)] = 0
        if self.logit_constant:
            if 'copy' in new_alt_spec:
                self.logit_model['params']['ASC for {}'.format(name)] = self.logit_model[
                    'params'].get('ASC for {}'.format(new_alt_spec['copy']), 0)
            else: self.logit_model['params']['ASC for {}'.format(name)] = 0
        for p in new_alt_generic_params:
            if p in self.logit_model['params']: 
                self.logit_model['params'][p] = new_alt_generic_params[p]
            else:
                print('[warning] invalid parameter name: {}'.format(p))
                
        if 'copy' in new_alt_spec: self.new_alts_like[name] = new_alt_spec['copy']
    
    def show_agg_prob(self):
        print('\nAggregated prob: \n----------------')
        ag_prob = self.prob.sum(axis=0)
        ag_prob = ag_prob / ag_prob.sum()
        for m, p in zip(self.alts.values(), ag_prob):
            print('{}: {:4.4f}'.format(m, p))
            
    def show_agg_outcome(self):
        print('\nAggregated outcome: \n----------------')
        ncs = len(self.mode)
        for idx, m in self.alts.items():
            this_ncs = len(np.where(self.mode==idx)[0])
            print('{}: {}, {:4.2f}%'.format(m, this_ncs, this_ncs/ncs*100))
    
    def apply_predictions(self):
        mode = copy.deepcopy(self.mode)
        for m in self.new_alts:
            this_mode_idx = self.alts_reverse[m]
            if m in self.new_alts_like: 
                replace_mode_idx = self.alts_reverse[self.new_alts_like[m]]
            else: replace_mode_idx = 0
            mode[np.where(mode==this_mode_idx)] = replace_mode_idx
        for i,od in enumerate(self.ods): 
            chosen_mode = mode[i]
            od['mode']=chosen_mode
            if od['o_loc']['type'] == 'geogrid' and od['d_loc']['type'] == 'geogrid':
                internal_route_mode = od['activity_routes'][chosen_mode]['route']
                external_time_sec = 0
                node_path = od['activity_routes']['node_path']
                cum_dist = od['activity_routes']['cum_dist']
                coords = od['activity_routes']['coords']
                time_to_enter_site=0
            elif od['o_loc']['type'] == 'portal' and od['d_loc']['type'] == 'geogrid':     #travel in
                internal_route_mode = od['activity_routes'][chosen_mode]['internal_route']['route']
                external_time_sec = od['activity_routes'][chosen_mode]['external_time']
                node_path = od['activity_routes'][chosen_mode]['node_path']
                od['o_loc']['ind'] = od['activity_routes'][chosen_mode]['portal']
                od['o_loc']['ll'] = portals['features'][od['activity_routes'][chosen_mode]['portal']]['properties']['centroid']
                cum_dist = od['activity_routes'][chosen_mode]['cum_dist']
                coords = od['activity_routes'][chosen_mode]['coords']
                time_to_enter_site=od['activity_routes'][chosen_mode]['external_time']
            elif od['o_loc']['type'] == 'geogrid' and od['d_loc']['type'] == 'portal':     #travel out  
                internal_route_mode = od['activity_routes'][chosen_mode]['internal_route']['route']
                external_time_sec = od['activity_routes'][chosen_mode]['external_time'] #or use external_time_sec=0?
                node_path = od['activity_routes'][chosen_mode]['node_path']
                cum_dist = od['activity_routes'][chosen_mode]['cum_dist']
                coords = od['activity_routes'][chosen_mode]['coords']
                time_to_enter_site=0
                od['d_loc']['ind'] = od['activity_routes'][chosen_mode]['portal']
                od['d_loc']['ll'] = portals['features'][od['activity_routes'][chosen_mode]['portal']]['properties']['centroid']
                            
            if chosen_mode == 0:
                internal_time_sec = int(internal_route_mode['driving']*60)
            elif chosen_mode == 1:
                internal_time_sec = int(internal_route_mode['cycling']*60)
            elif chosen_mode == 2:
                internal_time_sec = int(internal_route_mode['walking']*60)
            elif chosen_mode == 3:
                internal_time_sec = int((internal_route_mode['pt'] + internal_route_mode['walking'])*60)
            
            od['internal_time_sec'] = internal_time_sec
            od['external_time_sec'] = external_time_sec
            
            if od['person_id'] in self.person_lookup:
                self.person_lookup[od['person_id']]['activity_objs'][od['od_id']+1]['mode'] = chosen_mode
                self.person_lookup[od['person_id']]['activity_objs'][od['od_id']+1]['internal_time_sec'] = internal_time_sec
                self.person_lookup[od['person_id']]['activity_objs'][od['od_id']+1]['external_time_sec'] = external_time_sec
                self.person_lookup[od['person_id']]['activity_objs'][od['od_id']+1]['node_path'] = node_path
    #            person_lookup[od['person_id']]['activity_objs'][od['od_id']+1]['cum_dist'] = cum_dist
                self.person_lookup[od['person_id']]['activity_objs'][od['od_id']+1]['coords'] = coords
                self.person_lookup[od['person_id']]['activity_objs'][od['od_id']+1]['cum_time_from_act_start']=[
                        time_to_enter_site]+[time_to_enter_site+ cd/SPEEDS_MET_S[chosen_mode] for cd in cum_dist]


##  Load Pre-saved Data

In [4]:
SPEEDS_MET_S={0:30/3.6,
        1:15/3.6,
        2:4.8/3.6,
        3: 20/3.6 
        }
ods = pickle.load(open('ods.p', 'rb'))
persons = pickle.load(open('persons.p', 'rb'))
modelDict = pickle.load(open('trip_mode_logit.p', 'rb'))
featuresDict = json.load(open('logit_features.json', 'r'))
mode_rf=pickle.load( open('trip_mode_rf.p', "rb" ) )
rf_features=json.load(open('rf_features.json', 'r'))
portals=json.load(open('portals.json'))



## Test for Predictions 

mp = modePredictor(ods, persons, rf_model, rf_features, logit_model, logit_features) to get a instance of the modePredictor  
It can make predictions using RF, standard logit (mnl), or quasi-nested logit  
After any one of the 3 methods:  
- method = 'random' (default) will predict outcomes by random choices according to predicted probabilities  
- method = 'max' will predict outcomes by argmax(probabilities) 
- mp.prob will return the predicted probabilities
- mp.mode will return the predicted outcomes
- mp.show_agg_prob() will display the aggregated probability for each mode according to predicted case-specific probabilities  
- mp.show_agg_outcome() will display the aggregated count and share for each mode according to predicted outcomes
    

### RF Predictions

When rf_model and rf_features are loaded to mp, use mp.rf_predict() to abtain RF predictions

In [5]:
mp = modePredictor(ods=ods, persons=persons, rf_model=mode_rf, rf_features=rf_features, 
    logit_model=modelDict, logit_features=featuresDict)
mp.rf_predict(method='random')
mp.show_agg_prob()
mp.show_agg_outcome()


Aggregated prob: 
----------------
drive: 0.5942
cycle: 0.0329
walk: 0.0565
PT: 0.3164

Aggregated outcome: 
----------------
drive: 706, 61.18%
cycle: 40, 3.47%
walk: 74, 6.41%
PT: 334, 28.94%


### Logit Predictions

When logit_model and logit_features are loaded to mp, use mp.logit_predict() or mp.quasi_nl_predict() to abtain logit predictions

In [6]:
mp = modePredictor(ods=ods, persons=persons, rf_model=mode_rf, rf_features=rf_features, 
    logit_model=modelDict, logit_features=featuresDict)

mp.mnl_predict(method='random')
mp.show_agg_prob()
mp.show_agg_outcome()
base_prob = mp.prob  #for further comparison (check for IIA)


Aggregated prob: 
----------------
drive: 0.5911
cycle: 0.1087
walk: 0.0822
PT: 0.2180

Aggregated outcome: 
----------------
drive: 676, 58.58%
cycle: 117, 10.14%
walk: 89, 7.71%
PT: 272, 23.57%


### Introducing New Mode 

We can introduce a new mode by mp.set_new_alt(new_alt_spec)  
The key argument, new_alt_spec, is a dict with information of the name of this mode (key='name'), how alternative specific attributes (such as time_minutes) are specified (key='attrs'), how alternative specific parameters for generic attributes (such as gender, ASC) are set (key='copy' & 'params').  
- key='name': the name of this mode.
- key='attrs': the value of this key is another dict with key=attribute varname, and value=how to specify values for this var  
For example: 'attr': {'time_minutes': 5, 'walk_time_PT_minutes': 1} will set time_minutes for this new mode to 5, and walk_time_PT_minutes to 1  
If the attribute of this new mode could be derived from another mode, we can use a dict with keys=('copy','operation','min','max') to set it.  
For example: 'attr': {'time_minutes': {'copy':'drive', 'operation':'+3', 'min':0, 'max':20}} will set the 'time_minutes' of the new mode to the 'time_minutes' of 'drive' plus 3, and the results should be no less than 0 and no larger than 20  
An alternative way is to use a short string, like 'd+3': the first char must be one of 'd','c','w','p', representing 'drive', 'cycle','walk','pt'; and the second char must be one of '+','-','*','/'  
- key='copy': all alternative specific parameters for generic attributes for this new mode will copy from the value-mode  
For example, if we are introducing uber and assume that people have very similar preference for uber and drive, we can set 'copy':'drive', then the model will set 'ASC for uber' = 'ASC for drive', 'income_gt_50 for uber' = 'income_gt_50 for drive'  
- key='params': a dict with key=varname in logit model, and value=new value for this parameter

For example, the following code will add a new mode named "dockless"  
Its "time_minutes" to be copied from "cycle" and then minus 20, they shall be no less than 50  
Its "walk_time_PT_minutes" would be set as nan  
A new attribute, "wait_time" is added for it, with value set to 2  
Since "drive_time_PT_minutes" is included in logit_features['alt_attrs'], and is not explictly specified, it would be set to 0

In [7]:
mp = modePredictor(ods=ods, persons=persons, rf_model=mode_rf, rf_features=rf_features, 
    logit_model=modelDict, logit_features=featuresDict)
mp.set_new_alt({'name': 'dockless', 
                'attrs':{'time_minutes':{'copy':'cycle', 'operation':'-20', 'min':50}, 
                         'walk_time_PT_minutes':np.nan, 
                         'wait_time':2},
                'copy': 'cycle'
               })
# check the feature_df by calling mp.feature_df
mp.feature_df[['drive_time_minutes', 'cycle_time_minutes', 'walk_time_minutes', 'PT_time_minutes', 'dockless_time_minutes',
              'dockless_walk_time_PT_minutes', 'dockless_drive_time_PT_minutes', 'dockless_wait_time']].head()



Unnamed: 0,drive_time_minutes,cycle_time_minutes,walk_time_minutes,PT_time_minutes,dockless_time_minutes,dockless_walk_time_PT_minutes,dockless_drive_time_PT_minutes,dockless_wait_time
0,37.255139,74.510277,232.968037,46.064967,54.510277,,0,2
1,37.289025,74.57805,233.179825,46.115796,54.57805,,0,2
2,30.096178,60.192356,188.124196,33.784225,50.0,,0,2
3,31.297005,62.59401,195.616127,33.714849,50.0,,0,2
4,30.096178,60.192356,188.124196,33.784225,50.0,,0,2


A more flexible way to set these attributes is to retrive the feature_df, modify it, and then load it back to mp by set_feature_df()  
For example, the following codes will set "dockless_time_minutes" to 20

In [8]:
feature_df = mp.feature_df
feature_df['dockless_time_minutes'] = 20
mp.set_feature_df(feature_df)
# check the feature_df by calling mp.feature_df
mp.feature_df[['drive_time_minutes', 'cycle_time_minutes', 'walk_time_minutes', 'PT_time_minutes', 'dockless_time_minutes',
              'dockless_walk_time_PT_minutes', 'dockless_drive_time_PT_minutes', 'dockless_wait_time']].head()

Unnamed: 0,drive_time_minutes,cycle_time_minutes,walk_time_minutes,PT_time_minutes,dockless_time_minutes,dockless_walk_time_PT_minutes,dockless_drive_time_PT_minutes,dockless_wait_time
0,37.255139,74.510277,232.968037,46.064967,20,,0,2
1,37.289025,74.57805,233.179825,46.115796,20,,0,2
2,30.096178,60.192356,188.124196,33.784225,20,,0,2
3,31.297005,62.59401,195.616127,33.714849,20,,0,2
4,30.096178,60.192356,188.124196,33.784225,20,,0,2


check for logit long form data. Since "wait_time" is just for "dockless" (alt=4), it is set to 0 for all other modes

In [9]:
mp.get_long_form_data() # this is not needed for prediction, as logit_predict and quasi_nl_predict will call it
mp.long_data_df[['group', 'alt', 'time_minutes', 'walk_time_PT_minutes', 'drive_time_PT_minutes', 'wait_time']].head(10)

Unnamed: 0,group,alt,time_minutes,walk_time_PT_minutes,drive_time_PT_minutes,wait_time
0,0,0,37.255139,0.0,0,0
1,0,1,74.510277,0.0,0,0
2,0,2,232.968037,0.0,0,0
3,0,3,46.064967,5.699505,0,0
4,0,4,20.0,,0,2
5,1,0,37.289025,0.0,0,0
6,1,1,74.57805,0.0,0,0
7,1,2,233.179825,0.0,0,0
8,1,3,46.115796,5.699505,0,0
9,1,4,20.0,,0,2


Since we set "copy": "cycle", the alternative specific parameters for generic attributes for dockless are copied from cycle, as shown below

In [10]:
for varname, value in mp.logit_model['params'].items():
    print('{}: {}'.format(varname, value))

time_minutes: -0.0038899608846546803
drive_time_PT_minutes: 0.33714538076606515
income_gt100 for cycle: -0.23232330284548558
income_gt100 for walk: 0.29046598373117344
income_gt100 for PT: -0.03744839885809208
income_gt35-lt100 for cycle: -0.014737820374736352
income_gt35-lt100 for walk: 0.06921757817127394
income_gt35-lt100 for PT: -0.36851611169523874
income_lt35 for cycle: 0.03042728098125199
income_lt35 for walk: -0.10720014127908965
income_lt35 for PT: 0.021843414900332515
age_19 and under for cycle: 1.3317274337731626
age_19 and under for walk: 0.6556711525346325
age_19 and under for PT: 1.650895800141563
age_20 to 35 for cycle: 0.03262820253787115
age_20 to 35 for walk: -0.07981233909250676
age_20 to 35 for PT: -0.5526182598973802
age_35 to 60 for cycle: -0.32882613278399225
age_35 to 60 for walk: -0.1090351291247397
age_35 to 60 for PT: -0.7091992505962217
age_above 60 for cycle: -1.2521633445887543
age_above 60 for walk: -0.21434026405983464
age_above 60 for PT: -0.77319937502

### Logit Predictions with New Mode

In [11]:
mp = modePredictor(ods=ods, persons=persons, rf_model=mode_rf, rf_features=rf_features, 
    logit_model=modelDict, logit_features=featuresDict)
mp.set_new_alt({'name': 'dockless', 
                'attrs':{'time_minutes': 'c*0.7'},  # dockless_time_minutes = cycle_time_minutes * 0.7
                'copy': 'cycle'
               })
mp.mnl_predict(method='random')
mp.show_agg_outcome()
mp.show_agg_prob()
new_prob_mnl = mp.prob   #for further comparison (check for IIA)


Aggregated outcome: 
----------------
drive: 633, 54.85%
cycle: 112, 9.71%
walk: 85, 7.37%
PT: 223, 19.32%
dockless: 101, 8.75%

Aggregated prob: 
----------------
drive: 0.5469
cycle: 0.0883
walk: 0.0748
PT: 0.1986
dockless: 0.0915


### Quasi-NL Predictions with New Mode

mq.quasi_nl_predict(nests_spec) will make quasi-nl predictions, nests_spec is a list, and each element is a dict for a nest.  
The dict has keys = 'name' & 'alts' & 'sigma':  
- key='name': the name of this nest, optional, only useful for data debug
- key='alts': a list of modes in this nest
- key='sigma': a postive float defining our assumption on how strong the modes in this nest are correlated with each other. Larger sigma will yield stronger intra-substituion pattern, while smaller sigma will make the substituion pattern more proprotional.

The following cell will set "cycle" and "dockless" into a nest named "cycle_like", and use a small sigma=0.1 to represent low correlation

In [12]:
mp = modePredictor(ods=ods, persons=persons, rf_model=mode_rf, rf_features=rf_features, 
    logit_model=modelDict, logit_features=featuresDict)
mp.set_new_alt({'name': 'dockless', 
                'attrs':{'time_minutes': 'c*0.7'},  # dockless_time_minutes = cycle_time_minutes * 0.7
                'copy': 'cycle'
               })
mp.quasi_nl_predict(nests_spec=[{'name': 'cycle_like', 'alts':['cycle','dockless'], 'sigma':0.1}])
mp.show_agg_outcome()
mp.show_agg_prob()
new_prob_nl_small_sigma = mp.prob  #for further comparison (check for IIA)


Aggregated outcome: 
----------------
drive: 635, 55.03%
cycle: 92, 7.97%
walk: 106, 9.19%
PT: 225, 19.50%
dockless: 96, 8.32%

Aggregated prob: 
----------------
drive: 0.5522
cycle: 0.0841
walk: 0.0757
PT: 0.2009
dockless: 0.0871


The following cell will keep the same nest, but use a large sigma=0.9 to represent high correlation

In [13]:
mp = modePredictor(ods=ods, persons=persons, rf_model=mode_rf, rf_features=rf_features, 
    logit_model=modelDict, logit_features=featuresDict)
mp.set_new_alt({'name': 'dockless', 
                'attrs':{'time_minutes': 'c*0.7'},  # dockless_time_minutes = cycle_time_minutes * 0.7
                'copy': 'cycle'
               })
mp.quasi_nl_predict(nests_spec=[{'name': 'cycle_like', 'alts':['cycle','dockless'], 'sigma':0.9}])
mp.show_agg_outcome()
mp.show_agg_prob()
new_prob_nl_large_sigma = mp.prob  #for further comparison (check for IIA)


Aggregated outcome: 
----------------
drive: 687, 59.53%
cycle: 66, 5.72%
walk: 82, 7.11%
PT: 253, 21.92%
dockless: 66, 5.72%

Aggregated prob: 
----------------
drive: 0.5920
cycle: 0.0526
walk: 0.0825
PT: 0.2183
dockless: 0.0545


In [14]:
import warnings
warnings.filterwarnings('ignore')
alts = mp.alts
print('\n[check IIA for mnl] ')
groups =[0]
check_iia(base_prob, new_prob_mnl, alts, groups=groups)
print('\n[check IIA for quasi-nl (nest: cycle & dockless, sigma=0.1)] ')
check_iia(base_prob, new_prob_nl_small_sigma, alts, groups=groups)
print('\n[check IIA for quasi-nl (nest: cycle & dockless, sigma=0.9)] ')
check_iia(base_prob, new_prob_nl_large_sigma, alts, groups=groups)


[check IIA for mnl] 

Check for IIA (groupID = 0):
------------------------------------------
             Base       New       Change
drive        0.0153     0.0152      -0.76%
cycle        0.0070     0.0069      -0.76%
walk         0.0000     0.0000      -0.76%
PT           0.9777     0.9703      -0.76%
dockless     0.0000     0.0076        inf%

[check IIA for quasi-nl (nest: cycle & dockless, sigma=0.1)] 

Check for IIA (groupID = 0):
------------------------------------------
             Base       New       Change
drive        0.0153     0.0152      -0.66%
cycle        0.0070     0.0065      -7.64%
walk         0.0000     0.0000      -0.66%
PT           0.9777     0.9713      -0.66%
dockless     0.0000     0.0070        inf%

[check IIA for quasi-nl (nest: cycle & dockless, sigma=0.9)] 

Check for IIA (groupID = 0):
------------------------------------------
             Base       New       Change
drive        0.0153     0.0153      -0.04%
cycle        0.0070     0.0036     -4

### Multiple New Modes and New Attributes 

We can including multiple new modes by repeatedly calling mp.set_new_alt(), and defining multiple nests for similar modes

In [15]:
mp = modePredictor(ods=ods, persons=persons, rf_model=mode_rf, rf_features=rf_features, 
    logit_model=modelDict, logit_features=featuresDict)
mp.set_new_alt({'name': 'dockless', 
                'attrs':{'time_minutes': 'c*0.7'},  # dockless_time_minutes = cycle_time_minutes * 0.7
                'copy': 'cycle'
               })
mp.set_new_alt({'name': 'uber', 
                'attrs':{'time_minutes': 'd+0', # uber_time_minutes = drive_time_minutes
                         'wait_time': 5         # new attr: uber_wait_time=5
                        },  
                'copy': 'drive'
               })
mp.quasi_nl_predict(nests_spec=[
    {'name': 'cycle_like', 'alts':['cycle','dockless'], 'sigma':0.5},
    {'name': 'drive_like', 'alts':['drive', 'uber'], 'sigma': 0.5}
])
mp.show_agg_outcome()
mp.show_agg_prob()


Aggregated outcome: 
----------------
drive: 379, 32.84%
cycle: 62, 5.37%
walk: 70, 6.07%
PT: 208, 18.02%
dockless: 72, 6.24%
uber: 363, 31.46%

Aggregated prob: 
----------------
drive: 0.3166
cycle: 0.0546
walk: 0.0685
PT: 0.1872
dockless: 0.0565
uber: 0.3166


Although "wait_time" is included as a new attribute for uber, it won't work unless its parameter is set.  
We can set parameters in logit model using mp.set_logit_model_params({...}), the only argument is a dict with key=varname and value=parameter.  
If the varname is already existed, we will change the parameter value; if the varname is not existed, we will add a new parameter for the new attribute (like "wait_time" here) to make it work for predictions.  

The following cell will compare the effects of "wait_time". Before it is included in the model, the share of uber almost equals with drive, as they has the same "time_minutes" and ASC-like parameters. After it is included as strongly negative, the share of uber drops sharply. Comparison of utilites of the first 3 cases show that utilities of uber are decreasing by 25(=5*5)

In [16]:
print('================================\nBefore Adding Wait Time\n================================')
mp.mnl_predict()
mp.show_agg_outcome()
mp.show_agg_prob()
print('\nUtilites for the first 3 cases: \n----------------\n{}\n'.format(np.round(mp.v[:3],4)))

print('================================\nAfter Adding Wait Time\n================================')
mp.set_logit_model_params({'wait_time': -5})
mp.mnl_predict()
mp.show_agg_outcome()
mp.show_agg_prob()
print('\nUtilites for the first 3 cases: \n----------------\n{}\n'.format(np.round(mp.v[:3],4)))

Before Adding Wait Time

Aggregated outcome: 
----------------
drive: 385, 33.36%
cycle: 71, 6.15%
walk: 71, 6.15%
PT: 187, 16.20%
dockless: 67, 5.81%
uber: 373, 32.32%

Aggregated prob: 
----------------
drive: 0.3261
cycle: 0.0647
walk: 0.0566
PT: 0.1594
dockless: 0.0670
uber: 0.3261

Utilites for the first 3 cases: 
----------------
[[ -0.1449  -0.9279 -14.3544   4.0126  -0.841   -0.1449]
 [ -0.1451  -0.9298 -14.37     4.0125  -0.8428  -0.1451]
 [ -0.1171  -4.5322 -13.5018  -3.6975  -4.4619  -0.1171]]

After Adding Wait Time

Aggregated outcome: 
----------------
drive: 641, 55.55%
cycle: 86, 7.45%
walk: 85, 7.37%
PT: 231, 20.02%
dockless: 111, 9.62%
uber: 0, 0.00%

Aggregated prob: 
----------------
drive: 0.5468
cycle: 0.0882
walk: 0.0748
PT: 0.1985
dockless: 0.0914
uber: 0.0003

Utilites for the first 3 cases: 
----------------
[[ -0.1449  -0.9279 -14.3544   4.0126  -0.841  -25.1449]
 [ -0.1451  -0.9298 -14.37     4.0125  -0.8428 -25.1451]
 [ -0.1171  -4.5322 -13.5018  -3.6975  -

### Problems after Modes are Predicted 

As before, we will modify some other informaition for ods and persons after mode predictions, such as internal_time_sec, external_time_sec, node_path, cum_time_from_act_start, etc. However, problems will arise when there are new modes, because functions like internal_route_costs() and external_route_costs() are only working for mode=0,1,2,3. Do we need to re-write these functions, it seems to be a little bit troublesome. Besides, ext_route_costs is a precooked file also working only for mode=0,1,2,3.  

Currently I used a hacking way: if the key of "copy" of a new mode is specified, the codes will use the copied mode to get above information. For example, if dockless is copied from cycle, when a trip is predicted as using dockless micro mobility, then it will have the same node_path, external_time_sec, etc. as if it is using cycle. If "copy" is not specified, the codes will use "drive" to get these information.  

As persons and ods are NOT modified in place like before, we need to use returned ods and persons from mp after mode predictions, as showed below.

In [17]:
persons_before = persons
persons_after = mp.persons

activity_objs_ex_before = persons_before[0]['activity_objs']
activity_objs_ex_after = persons_after[0]['activity_objs']

print('Example of activity_objs for the first person, before mode predictions:')
print('-------------------------------------------\n{}'.format(activity_objs_ex_before))
print('\nExample of activity_objs for the first person, before mode predictions:')
print('-------------------------------------------\n{}'.format(activity_objs_ex_after))

Example of activity_objs for the first person, before mode predictions:
-------------------------------------------
[{'t': 2397, 'activity': 'H', 'place_sim': {'type': 'portal', 'geo_id': '261635001001'}}, {'t': 33991, 'activity': 'W', 'place_sim': {'type': 'geogrid', 'ind': 1734}}, {'t': 56535, 'activity': 'H', 'place_sim': {'type': 'portal', 'geo_id': '261635001001'}}, {'t': 67397, 'activity': 'Z', 'place_sim': {'type': 'portal', 'geo_id': '261635211002'}}, {'t': 74859, 'activity': 'H', 'place_sim': {'type': 'portal', 'geo_id': '261635001001'}}]

Example of activity_objs for the first person, before mode predictions:
-------------------------------------------
[{'t': 2397, 'activity': 'H', 'place_sim': {'type': 'portal', 'geo_id': '261635001001'}}, {'t': 33991, 'activity': 'W', 'place_sim': {'type': 'geogrid', 'ind': 1734}, 'mode': 3, 'internal_time_sec': 422, 'external_time_sec': 2754, 'node_path': ['5830991248', '5830991247', '5830991246', '5830991237', '5830991236', '5830991242', 