In [2]:
import pandas  as pd
import numpy as np
import matplotlib.pyplot as plt

import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
import biogeme.expressions as exp
import biogeme.tools as tools
import biogeme.distributions as dist

---
---

# Auxiliary functions

In [3]:
def qbus_update_globals_bgm(pd_df):
   globals().update(db.Database('tmp_bg_bgm_for_glob', pd_df).variables)


def qbus_estimate_bgm(V, pd_df, tgtvar_name, modelname='bgmdef'):
 av_auto = V.copy()
 for key, value in av_auto.items():
   av_auto[key] = 1
 bgm_db = db.Database(modelname + '_db', pd_df)
 globals().update(bgm_db.variables)
 logprob = models.loglogit (V , av_auto , bgm_db.variables[tgtvar_name] )
 bgm_model = bio.BIOGEME ( bgm_db, logprob )
 bgm_model.utility_dic = V.copy()
 return bgm_model, bgm_model.estimate()



def qbus_simulate_bgm(qbus_bgm_model, betas, pred_pd_df):
  av_auto = None
  targets = None
  if hasattr(qbus_bgm_model, 'ord_probs'):
    av_auto = qbus_bgm_model.ord_probs.copy()
    targets = qbus_bgm_model.ord_probs.copy()
  else:
    av_auto = qbus_bgm_model.utility_dic.copy()
    targets = qbus_bgm_model.utility_dic.copy()

  for key, value in av_auto.items():
    av_auto[key] = 1
  for key, value in targets.items():
    if hasattr(qbus_bgm_model, 'nest_tuple'):
      targets[key] = models.nested(qbus_bgm_model.utility_dic, av_auto, qbus_bgm_model.nest_tuple, key)
    else:
      if hasattr(qbus_bgm_model, 'ord_probs'):
       0
       #targets[key] = qbus_bgm_model.ord_probs[key]
      else:
       targets[key] = models.logit(qbus_bgm_model.utility_dic, av_auto, key)

  bgm_db = db.Database('simul', pred_pd_df)
  globals().update(bgm_db.variables)
  bgm_pred_model = bio.BIOGEME(bgm_db, targets)
  simulatedValues = bgm_pred_model.simulate(betas)
  return simulatedValues



def qbus_calc_accu_confusion(sim_probs, pd_df, choice_var):
  which_max = sim_probs.idxmax(axis=1)
  data = {'y_Actual':   pd_df[choice_var],
          'y_Predicted': which_max
        }

  df = pd.DataFrame(data, columns=['y_Actual','y_Predicted'])
  confusion_matrix = pd.crosstab(df['y_Actual'], df['y_Predicted'], rownames=['Actual'], colnames=['Predicted'])
  accu = np.mean(which_max == pd_df[choice_var])
  return accu, confusion_matrix



def qbus_likeli_ratio_test_bgm(results_complex, results_reference, signif_level):
  return tools.likelihood_ratio_test( (results_complex.data.logLike, results_complex.data.nparam),
                                     (results_reference.data.logLike, results_reference.data.nparam), signif_level)



In [None]:
def qbus_estimate_ordered_bgm(V, ord_alt_ids, pd_df, tgtvar_name, modelname='ord_bgm'):
 bgm_db = db.Database(modelname + '_db', pd_df)
 globals().update(bgm_db.variables)

 taus_map = {ord_alt_ids[0]: exp.Beta('tau1', -1, None, None, 0) }
 i = 1
 for id in ord_alt_ids[1:-1]:
  taus_map[id] = taus_map[ ord_alt_ids[i-1] ] + exp.Beta('delta_'+ str(i + 1), i, 0, None, 0)
  i = i + 1

 alt_probs_map = {ord_alt_ids[0]: dist.logisticcdf( taus_map[ord_alt_ids[0] ] - V_ord) }

 i = 1
 for id in ord_alt_ids[1:-1]:
  alt_probs_map[id] = dist.logisticcdf( taus_map[id] - V_ord) - dist.logisticcdf( taus_map[ ord_alt_ids[i-1] ] - V_ord)
  i = i + 1

 alt_probs_map[ord_alt_ids[i] ] = 1 - dist.logisticcdf( taus_map[ord_alt_ids[i-1]] - V_ord)

 logprob = exp.log(exp.Elem(alt_probs_map, bgm_db.variables[tgtvar_name]))

 #logprob = models.loglogit (V , av_auto , bgm_db.variables[tgtvar_name] )
 bgm_model = bio.BIOGEME ( bgm_db, logprob )
 bgm_model.utility_dic = V
 bgm_model.ord_probs = alt_probs_map.copy()
 return bgm_model, bgm_model.estimate()

def qbus_estimate_mixed_bgm(V, pd_df, tgtvar_name, panelvar_name=None, n_draws=50, seed=1, modelname='bgmdef'):
 do_panel = not (panelvar_name==None)

 av_auto = V.copy()
 for key, value in av_auto.items():
   av_auto[key] = 1
 bgm_db = db.Database(modelname + '_db', pd_df)
 if (do_panel):
   bgm_db.panel(panelvar_name)

 globals().update(bgm_db.variables)
 #logprob = models.loglogit (V , av_auto , bgm_db.variables[tgtvar_name] )
 obsprob = models.logit(V, av_auto, bgm_db.variables[tgtvar_name])
 if (do_panel):
  condprobIndiv = exp.PanelLikelihoodTrajectory(obsprob)
 else:
  condprobIndiv = obsprob
 logprob = exp.log(exp.MonteCarlo(condprobIndiv))
 bgm_model  = bio.BIOGEME(bgm_db,logprob,numberOfDraws=n_draws, seed=seed)
 bgm_model.utility_dic = V.copy()
 return bgm_model, bgm_model.estimate()




def qbus_estimate_nested_bgm(V, pd_df, nests,  tgtvar_name, modelname='bgmdef'):
 av_auto = V.copy()
 for key, value in av_auto.items():
   av_auto[key] = 1
 bgm_db = db.Database(modelname + '_db', pd_df)
 globals().update(bgm_db.variables)
 logprobnest = models.lognested (V, av_auto , nests , bgm_db.variables[tgtvar_name] )
 #logprob = models.loglogit (V , av_auto , bgm_db.variables[tgtvar_name] )
 bgm_model = bio.BIOGEME ( bgm_db, logprobnest )
 bgm_model.utility_dic = V.copy()
 bgm_model.nest_tuple = nests
 return bgm_model, bgm_model.estimate()

In [None]:
def calc_mnl_cov(design_m, cprobs, num_alt, attrs_per_alt):
  P_rep = np.repeat(cprobs.to_numpy(), np.repeat(attrs_per_alt, num_alt), axis=1)
  num_cols = num_alt * attrs_per_alt
  XP_rep = np.repeat((design_m.to_numpy()*P_rep).sum(axis=1).T.reshape(-1,1), num_cols, axis=1)
  Z = design_m - XP_rep
  ZPZ = np.matmul(Z.T, P_rep*Z.to_numpy())
  covMNL = np.linalg.pinv(ZPZ)
  if (np.linalg.det(covMNL)):
    return covMNL
  else:
    return np.eye(covMNL.shape[0])*1000
  return covMNL

def d_effic(covMAT):
  return np.power( np.linalg.det(covMAT), 1 / (covMAT.shape[0] + 1) )