In [4]:
import sys
sys.path.append('../')

In [5]:
import game.agent as ag
from game.run import run
from game.simulator import load_simulator_yarn, GraphSimulator
from pathlib import Path
from tqdm.auto import trange
import pandas as pd
# import mpire
import torch

In [6]:
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

## Description

This game only has one personality trait, which is safety (no risk).

Therefore, a 1 implies a safe action (without risk), and a 0 implies a risky action.

In [7]:
n_runs = int(1e5)
# n_runs = int(100)
seed = None

## Simulator

In [8]:
simulator = load_simulator_yarn('../yarnScripts', graph_file_sfx='_graph_transformed.pickle')

## Agents

In [9]:
# label_predictor=ag.TorchLabelPredictor(
#             model_path=Path('./saved_bert/200_[20]_[30]_1_False_bert-base-multilingual-cased_0.001_adamw_8_max_val_mcc_False_False_100'),
#             use_cpu=False,
#         )

def get_linear_agent(a: torch.Tensor):
    """
    Gets an agent with a BehavioralDecisionMaker that has a linear function

    :param torch.Tensor a: matrix A that characterizes the behavior profile
    """
    return ag.LabelDecisorAgent(
        label_predictor=ag.GraphLabelLoader(
            simulator=simulator,
        ),
        decision_maker=ag.BehavioralDecisionMaker(
            weight_funcs=[
                lambda x: (a*x).sum()
            ],
            memory_steps=a.shape[0]-1,
            seed=None,
            deterministic=False,
        )
    )

In [10]:
def get_random_agent():
    return ag.RandomAgent(None)


# def get_r_agent():
#     return ag.LabelDecisorAgent(
#         label_predictor=ag.GraphLabelLoader(
#             simulator=simulator,
#         ),
#         decision_maker=ag.RDecisionMaker(
#             rand=0.4,
#             seed=None,
#         )
#     )


def get_risky_agent():
    return get_linear_agent(torch.as_tensor([
        [2]
    ], dtype=torch.float))


def get_risk_aversion_agent():
    return get_linear_agent(torch.as_tensor([
        [-2]
    ], dtype=torch.float))


def get_risky_ntm_agent():
    return get_linear_agent(torch.as_tensor([
        [8],
        [-8],
        [-6],
    ], dtype=torch.float))

def get_ultra_risky_ntm_agent():
    return get_linear_agent(torch.as_tensor([
        [-8],
        [8],
        [6],
    ], dtype=torch.float))


# def get_mc_agent():
#     return ag.LabelDecisorAgent(
#         label_predictor=ag.GraphLabelLoader(
#             simulator=simulator,
#         ),
#         decisor=ag.MarkovChainAgent(
#             rand=0.4,
#             seed=None,
#         )
#     )

In [11]:
# def get_random_agent():
#     return ag.RandomAgent(None)


# # def get_r_agent():
# #     return ag.LabelDecisorAgent(
# #         label_predictor=ag.GraphLabelLoader(
# #             simulator=simulator,
# #         ),
# #         decision_maker=ag.RDecisionMaker(
# #             rand=0.4,
# #             seed=None,
# #         )
# #     )


# def get_risky_agent():
#     return ag.LabelDecisorAgent(
#         label_predictor=ag.GraphLabelLoader(
#             simulator=simulator,
#         ),
#         decision_maker=ag.BehavioralDecisionMaker(
#             weight_funcs=[
#                 lambda x: x
#             ],
#             memory_steps=0,
#             seed=None,
#         )
#     )


# def get_risky_agent():
#     return ag.LabelDecisorAgent(
#         label_predictor=ag.GraphLabelLoader(
#             simulator=simulator,
#         ),
#         decision_maker=ag.BehavioralDecisionMaker(
#             weight_funcs=[
#                 lambda x: x
#             ],
#             memory_steps=0,
#             seed=None,
#         )
#     )


# def get_risky_ntm_agent():
#     return ag.LabelDecisorAgent(
#         label_predictor=ag.GraphLabelLoader(
#             simulator=simulator,
#         ),
#         decision_maker=ag.BehavioralDecisionMaker(
#             weight_funcs=[
#                 lambda x: 0.5*x,
#                 lambda x: -0.3*x,
#                 lambda x: -0.3*x,
#             ],
#             memory_steps=3,
#             seed=None,
#         )
#     )


# # def get_mc_agent():
# #     return ag.LabelDecisorAgent(
# #         label_predictor=ag.GraphLabelLoader(
# #             simulator=simulator,
# #         ),
# #         decisor=ag.MarkovChainAgent(
# #             rand=0.4,
# #             seed=None,
# #         )
# #     )

## Run

In [12]:
data = []
models = {
    'rand':0,
    'risky':1,
    'no_risky':2,
    'risky_ntm':3,
    'u_risky_ntm':4,
}

### Random Agent

In [13]:
df_r = run(get_random_agent, simulator, n_runs)

100%|██████████| 100000/100000 [00:17<00:00, 5689.41it/s]


In [14]:
df = df_r

In [15]:
df['endings']

Unnamed: 0,run,title,kind
0,0,MaloSilencio,0
1,1,MaloPadres,0
2,2,MaloAmigos,0
3,3,MaloPadres,0
4,4,MaloAmanda,0
...,...,...,...
99995,99995,MaloSilencio,0
99996,99996,MaloPadres,0
99997,99997,MaloSilencio,0
99998,99998,MaloPadres,0


In [16]:
df['decisions'].groupby(['qid']).mean()
# diferencia entre estos valores y los otros nos dice si tiene predilección por label o no

Unnamed: 0_level_0,run,num,choice,label
qid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1.0,50001.051487,11.963135,0.0,
1.0,49999.5,0.0,0.50092,
2.0,49999.5,1.0,0.50024,0.50024
3.0,49999.5,3.0,0.49959,
4.0,49999.5,5.0,0.50044,0.49956
5.0,49999.5,6.0,0.49993,0.49993
6.0,50063.313744,7.0,0.49989,0.49989
7.0,49999.5,7.49993,0.49804,0.49804
8.0,50146.97189,8.498795,0.502008,0.502008
9.0,49999.5,9.99797,0.50317,0.49683


In [17]:
dec = df['decisions'].loc[df['decisions']['qid'] != -1,:]
dec

Unnamed: 0,run,num,qid,choice,label
0,0,0,1.0,1,
1,0,1,2.0,0,0.0
3,0,3,3.0,1,
5,0,5,4.0,0,1.0
6,0,6,5.0,1,1.0
...,...,...,...,...,...
1987304,99999,10,10.0,0,1.0
1987305,99999,11,11.0,1,0.0
1987307,99999,13,12.0,0,0.0
1987309,99999,15,14.0,1,0.0


In [18]:
dec = dec[['run','qid','label']].pivot(index='run', columns='qid', values='label').fillna(-1)
dec['m'] = models['rand']
data.append(dec)
dec

qid,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,-1.0,0.0,-1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,-1.0,-1.0,0.0,0
1,-1.0,0.0,-1.0,1.0,1.0,1.0,0.0,-1.0,0.0,1.0,1.0,0.0,-1.0,-1.0,1.0,0
2,-1.0,1.0,-1.0,1.0,0.0,-1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,-1.0,-1.0,0
3,-1.0,0.0,-1.0,1.0,0.0,-1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,-1.0,1.0,0
4,-1.0,0.0,-1.0,1.0,0.0,-1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,-1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,-1.0,0.0,-1.0,1.0,1.0,1.0,0.0,-1.0,0.0,1.0,0.0,0.0,-1.0,-1.0,0.0,0
99996,-1.0,0.0,-1.0,1.0,0.0,-1.0,0.0,-1.0,0.0,1.0,0.0,0.0,-1.0,-1.0,1.0,0
99997,-1.0,1.0,-1.0,0.0,0.0,-1.0,0.0,-1.0,1.0,1.0,1.0,0.0,0.0,-1.0,0.0,0
99998,-1.0,1.0,-1.0,1.0,1.0,0.0,0.0,-1.0,1.0,1.0,1.0,0.0,0.0,-1.0,1.0,0


### Risky Agent

In [19]:
df_risk = run(get_risky_agent, simulator, n_runs)

100%|██████████| 100000/100000 [05:37<00:00, 296.43it/s]


In [20]:
df = df_risk

In [21]:
df['endings']

Unnamed: 0,run,title,kind
0,0,MaloAmanda,0
1,1,MaloAmanda,0
2,2,MaloAmanda,0
3,3,BuenoPadres,1
4,4,MaloAmanda,0
...,...,...,...
99995,99995,BuenoAmanda,1
99996,99996,MaloPadres,0
99997,99997,MaloSilencio,0
99998,99998,MaloPadres,0


In [22]:
df['decisions'].groupby(['qid']).mean()
# diferencia entre estos valores y los otros nos dice si tiene predilección por label o no

Unnamed: 0_level_0,run,num,choice,label
qid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1.0,49992.982221,11.925234,0.0,
1.0,49999.5,0.0,0.22464,
2.0,49999.5,1.0,0.78674,0.78674
3.0,49999.5,3.0,0.50295,
4.0,49999.5,5.0,0.13364,0.86636
5.0,49999.5,6.0,0.75089,0.75089
6.0,50090.953828,7.0,0.758873,0.758873
7.0,49999.5,7.75089,0.73284,0.73284
8.0,49986.791633,8.751023,0.743737,0.743737
9.0,49999.5,10.48373,0.18835,0.81165


In [23]:
dec = df['decisions'].loc[df['decisions']['qid'] != -1,:]
dec

Unnamed: 0,run,num,qid,choice,label
0,0,0,1.0,0,
1,0,1,2.0,1,1.0
3,0,3,3.0,1,
5,0,5,4.0,0,1.0
6,0,6,5.0,1,1.0
...,...,...,...,...,...
2015728,99999,9,8.0,1,1.0
2015730,99999,11,9.0,0,1.0
2015731,99999,12,10.0,0,1.0
2015732,99999,13,11.0,0,1.0


In [24]:
dec = dec[['run','qid','label']].pivot(index='run', columns='qid', values='label').fillna(-1)
dec['m'] = models['risky']
data.append(dec)
dec

qid,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,-1.0,1.0,-1.0,1.0,1.0,1.0,0.0,-1.0,1.0,1.0,1.0,0.0,1.0,1.0,-1.0,1
1,-1.0,1.0,-1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,-1.0,1
2,-1.0,0.0,-1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,-1.0,1
3,-1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,1
4,-1.0,1.0,-1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,-1.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,-1.0,1.0,-1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,1
99996,-1.0,1.0,-1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,-1.0,-1.0,1.0,1
99997,-1.0,1.0,-1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,1
99998,-1.0,1.0,-1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,-1.0,1.0,1


### Risk Aversion Agent

In [25]:
df_no_risky = run(get_risk_aversion_agent, simulator, n_runs)

100%|██████████| 100000/100000 [05:25<00:00, 307.33it/s]


In [26]:
df = df_no_risky

In [27]:
df['endings']

Unnamed: 0,run,title,kind
0,0,MaloSilencio,0
1,1,MaloSilencio,0
2,2,MaloSilencio,0
3,3,MaloSilencio,0
4,4,MaloSilencio,0
...,...,...,...
99995,99995,MaloSilencio,0
99996,99996,MaloSilencio,0
99997,99997,MaloSilencio,0
99998,99998,MaloSilencio,0


In [28]:
df['decisions'].groupby(['qid']).mean()
# diferencia entre estos valores y los otros nos dice si tiene predilección por label o no

Unnamed: 0_level_0,run,num,choice,label
qid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1.0,49999.570091,11.484976,0.0,
1.0,49999.5,0.0,0.77062,
2.0,49999.5,1.0,0.21274,0.21274
3.0,49999.5,3.0,0.50175,
4.0,49999.5,5.0,0.86603,0.13397
5.0,49999.5,6.0,0.24505,0.24505
6.0,50126.536544,7.0,0.236686,0.236686
7.0,49999.5,7.24505,0.26479,0.26479
8.0,49945.52857,8.244345,0.25794,0.25794
9.0,49999.5,9.50984,0.80851,0.19149


In [29]:
dec = df['decisions'].loc[df['decisions']['qid'] != -1,:]
dec

Unnamed: 0,run,num,qid,choice,label
0,0,0,1.0,1,
1,0,1,2.0,0,0.0
3,0,3,3.0,0,
5,0,5,4.0,1,0.0
6,0,6,5.0,0,0.0
...,...,...,...,...,...
1882987,99999,8,8.0,0,0.0
1882989,99999,10,9.0,1,0.0
1882990,99999,11,10.0,1,0.0
1882991,99999,12,11.0,1,0.0


In [30]:
dec = dec[['run','qid','label']].pivot(index='run', columns='qid', values='label').fillna(-1)
dec['m'] = models['no_risky']
data.append(dec)
dec

qid,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,-1.0,0.0,-1.0,0.0,0.0,-1.0,0.0,-1.0,1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,2
1,-1.0,0.0,-1.0,1.0,0.0,-1.0,1.0,0.0,0.0,0.0,1.0,0.0,-1.0,-1.0,-1.0,2
2,-1.0,1.0,-1.0,0.0,0.0,-1.0,0.0,-1.0,0.0,1.0,0.0,0.0,-1.0,-1.0,0.0,2
3,-1.0,0.0,-1.0,0.0,0.0,-1.0,0.0,-1.0,0.0,1.0,0.0,0.0,-1.0,-1.0,0.0,2
4,-1.0,1.0,-1.0,0.0,0.0,-1.0,0.0,-1.0,0.0,1.0,1.0,0.0,-1.0,-1.0,0.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,-1.0,0.0,-1.0,0.0,0.0,-1.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,2
99996,-1.0,0.0,-1.0,0.0,1.0,0.0,0.0,-1.0,1.0,1.0,0.0,0.0,0.0,-1.0,0.0,2
99997,-1.0,0.0,-1.0,0.0,0.0,-1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,2
99998,-1.0,0.0,-1.0,0.0,0.0,-1.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,2


### Risky But Not So Much Agent

In [31]:
df_risk_ntm = run(get_risky_ntm_agent, simulator, n_runs)

100%|██████████| 100000/100000 [06:16<00:00, 265.30it/s]


In [32]:
df = df_risk_ntm

In [33]:
df['endings']

Unnamed: 0,run,title,kind
0,0,MaloPadres,0
1,1,MaloSilencio,0
2,2,MaloPadres,0
3,3,MaloAmigos,0
4,4,MaloAmigos,0
...,...,...,...
99995,99995,MaloAmigos,0
99996,99996,MaloSilencio,0
99997,99997,MaloPadres,0
99998,99998,MaloAmigos,0


In [34]:
df['decisions'].groupby(['qid']).mean()
# diferencia entre estos valores y los otros nos dice si tiene predilección por label o no

Unnamed: 0_level_0,run,num,choice,label
qid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1.0,49997.773047,12.114985,0.0,
1.0,49999.5,0.0,0.00765,
2.0,49999.5,1.0,0.86004,0.86004
3.0,49999.5,3.0,0.50158,
4.0,49999.5,5.0,0.63635,0.36365
5.0,49999.5,6.0,0.60434,0.60434
6.0,50130.16537,7.0,0.752358,0.752358
7.0,49999.5,7.60434,0.50488,0.50488
8.0,49899.339546,8.448958,0.639102,0.639102
9.0,49999.5,10.10922,0.44651,0.55349


In [35]:
dec = df['decisions'].loc[df['decisions']['qid'] != -1,:]
dec

Unnamed: 0,run,num,qid,choice,label
0,0,0,1.0,0,
1,0,1,2.0,1,1.0
3,0,3,3.0,1,
5,0,5,4.0,0,1.0
6,0,6,5.0,0,0.0
...,...,...,...,...,...
2001468,99999,10,9.0,0,1.0
2001469,99999,11,10.0,1,0.0
2001470,99999,12,11.0,1,0.0
2001472,99999,14,12.0,0,0.0


In [36]:
dec = dec[['run','qid','label']].pivot(index='run', columns='qid', values='label').fillna(-1)
dec['m'] = models['risky_ntm']
data.append(dec)
dec

qid,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,-1.0,1.0,-1.0,1.0,0.0,-1.0,0.0,-1.0,0.0,1.0,0.0,0.0,-1.0,-1.0,1.0,3
1,-1.0,1.0,-1.0,1.0,0.0,-1.0,0.0,-1.0,0.0,0.0,1.0,0.0,-1.0,-1.0,-1.0,3
2,-1.0,1.0,-1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,-1.0,-1.0,1.0,3
3,-1.0,1.0,-1.0,1.0,0.0,-1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,-1.0,-1.0,3
4,-1.0,1.0,-1.0,1.0,0.0,-1.0,0.0,-1.0,1.0,0.0,1.0,0.0,1.0,-1.0,-1.0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,-1.0,1.0,-1.0,1.0,0.0,-1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,-1.0,-1.0,3
99996,-1.0,0.0,-1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,-1.0,-1.0,0.0,3
99997,-1.0,0.0,-1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,-1.0,-1.0,1.0,3
99998,-1.0,1.0,-1.0,1.0,0.0,-1.0,0.0,-1.0,1.0,0.0,1.0,0.0,1.0,-1.0,-1.0,3


### Plus Risky Not so much

In [37]:
df_risk_ntm = run(get_ultra_risky_ntm_agent, simulator, n_runs)

100%|██████████| 100000/100000 [06:15<00:00, 266.27it/s]


In [38]:
df = df_risk_ntm

In [39]:
df['endings']

Unnamed: 0,run,title,kind
0,0,BuenoAmanda,1
1,1,MaloSilencio,0
2,2,BuenoAmanda,1
3,3,MaloSilencio,0
4,4,MaloSilencio,0
...,...,...,...
99995,99995,MaloSilencio,0
99996,99996,MaloSilencio,0
99997,99997,MaloSilencio,0
99998,99998,MaloSilencio,0


In [40]:
df['decisions'].groupby(['qid']).mean()
# diferencia entre estos valores y los otros nos dice si tiene predilección por label o no

Unnamed: 0_level_0,run,num,choice,label
qid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1.0,49998.690824,11.749827,0.0,
1.0,49999.5,0.0,0.99228,
2.0,49999.5,1.0,0.00744,0.00744
3.0,49999.5,3.0,0.50201,
4.0,49999.5,5.0,0.36242,0.63758
5.0,49999.5,6.0,0.61014,0.61014
6.0,50099.207051,7.0,0.826417,0.826417
7.0,49999.5,7.61014,0.44102,0.44102
8.0,49943.854043,8.924221,0.648905,0.648905
9.0,49999.5,10.05116,0.57618,0.42382


In [41]:
dec = df['decisions'].loc[df['decisions']['qid'] != -1,:]
dec

Unnamed: 0,run,num,qid,choice,label
0,0,0,1.0,1,
1,0,1,2.0,0,0.0
3,0,3,3.0,1,
5,0,5,4.0,0,1.0
6,0,6,5.0,1,1.0
...,...,...,...,...,...
1937371,99999,9,8.0,1,1.0
1937373,99999,11,9.0,0,1.0
1937374,99999,12,10.0,0,1.0
1937375,99999,13,11.0,0,1.0


In [42]:
dec = dec[['run','qid','label']].pivot(index='run', columns='qid', values='label').fillna(-1)
dec['m'] = models['u_risky_ntm']
data.append(dec)
dec

qid,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,-1.0,0.0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,4
1,-1.0,0.0,-1.0,1.0,0.0,-1.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,4
2,-1.0,0.0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,4
3,-1.0,0.0,-1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,4
4,-1.0,0.0,-1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,-1.0,0.0,-1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,4
99996,-1.0,0.0,-1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,4
99997,-1.0,0.0,-1.0,0.0,0.0,-1.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,4
99998,-1.0,0.0,-1.0,0.0,1.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,4


## Data Analysis

In [43]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
# from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
# from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
# from sklearn.inspection import DecisionBoundaryDisplay

In [44]:
df = pd.concat(data, ignore_index=True).fillna(-1)
df.to_csv('results.csv', index=False)
df = pd.read_csv('results.csv')
df

Unnamed: 0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
0,-1.0,0.0,-1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,-1.0,-1.0,0.0,0
1,-1.0,0.0,-1.0,1.0,1.0,1.0,0.0,-1.0,0.0,1.0,1.0,0.0,-1.0,-1.0,1.0,0
2,-1.0,1.0,-1.0,1.0,0.0,-1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,-1.0,-1.0,0
3,-1.0,0.0,-1.0,1.0,0.0,-1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,-1.0,1.0,0
4,-1.0,0.0,-1.0,1.0,0.0,-1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,-1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499995,-1.0,0.0,-1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,4
499996,-1.0,0.0,-1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,4
499997,-1.0,0.0,-1.0,0.0,0.0,-1.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,4
499998,-1.0,0.0,-1.0,0.0,1.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,4


In [None]:
n_classes = len(df['m'].unique())

In [None]:
x_train, x_test, y_train, y_test = train_test_split(df.loc[:,df.columns!='m'], df['m'], test_size=0.3)

In [None]:
y_train.value_counts(normalize=True)

In [None]:
y_test.value_counts(normalize=True)

In [None]:
x_train

In [None]:
x_train = x_train.to_numpy()
y_train = y_train.to_numpy()
x_test = x_test.to_numpy()
y_test = y_test.to_numpy()

In [None]:
dt = DecisionTreeClassifier()
dt.fit(x_train, y_train)

lr = LogisticRegression(max_iter=500)
lr.fit(x_train, y_train)

In [None]:
dt.score(x_test,y_test)

In [None]:
y_pred = dt.predict(x_test)
print(metrics.classification_report(y_true=y_test,y_pred=y_pred, digits=3))
print(metrics.confusion_matrix(y_true=y_test,y_pred=y_pred, normalize='true').round(3))
print(metrics.matthews_corrcoef(y_true=y_test,y_pred=y_pred).round(3))

In [None]:
lr.score(x_test,y_test)

In [None]:
# https://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html
# https://scikit-learn.org/stable/auto_examples/neighbors/plot_nca_dim_reduction.html

In [None]:
classifiers= [
    DecisionTreeClassifier(),
    LogisticRegression(max_iter=500),
    AdaBoostClassifier(),
    RandomForestClassifier(),
    SVC(kernel='linear'),
    SVC(),
    KNeighborsClassifier(n_neighbors=n_classes),
]

In [None]:
for c in classifiers:
    print(f"{color.BLUE}{color.BOLD}Classifier {c}{color.END}")
    # train model
    c.fit(x_train, y_train)
    
    # plot using pca
    
    # get evaluation info
    y_pred = c.predict(x_test)
    print(metrics.classification_report(y_true=y_test,y_pred=y_pred, digits=3))
    print(metrics.confusion_matrix(y_true=y_test,y_pred=y_pred, normalize='true').round(3))
    print(metrics.f1_score(y_true=y_test,y_pred=y_pred, average='weighted').round(3))
    print(metrics.matthews_corrcoef(y_true=y_test,y_pred=y_pred).round(3))