In [1]:
import sys
sys.path.append('../')

In [2]:
import game.agent as ag
from game.run import run
from game.simulator import load_simulator_yarn, GraphSimulator
from pathlib import Path
from tqdm.auto import trange
import pandas as pd
import torch

In [3]:
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

## Description

This game only has one personality trait, which is safety (no risk).

Therefore, a 1 implies a safe action (without risk), and a 0 implies a risky action.



In [4]:
seed = None

## Simulator

In [5]:
simulator = load_simulator_yarn('../yarnScripts', graph_file_sfx='_graph_transformed.pickle')

## Agents

In [6]:
# label_predictor=ag.TorchLabelPredictor(
#             model_path=Path('./saved_bert/200_[20]_[30]_1_False_bert-base-multilingual-cased_0.001_adamw_8_max_val_mcc_False_False_100'),
#             use_cpu=False,
#         )

def get_linear_agent(a: torch.Tensor, inverse:bool = False):
    """
    Gets an agent with a BehavioralDecisionMaker that has a linear relation with the memory matrix

    :param torch.Tensor a: matrix A that characterizes the behavior profile
    :param bool inverse: if true, it multiplies A by (1-M) instead of M (first row will still be 1)
    """
    if not inverse:
        dec_maker =  ag.BehavioralDecisionMaker(
            weight_funcs=[
                lambda m,s: (a*m).sum()
            ],
            memory_steps=a.shape[0]-1,
            seed=None,
            deterministic=False,
        )
    else:
        t = torch.zeros(a.shape[0], 1)
        t[0]=1
        dec_maker = ag.BehavioralDecisionMaker(
            weight_funcs=[
                lambda m,s: (a*(s-m+t)).sum()
            ],
            memory_steps=a.shape[0]-1,
            seed=None,
            deterministic=False,
        )

    return ag.LabelDecisorAgent(
        label_predictor=ag.GraphLabelLoader(
            simulator=simulator,
        ),
        decision_maker=dec_maker,
    )

Values greater than 0 imply favoring safety

Values lower than 0 imply not favoring safety (favoring risk)

In [7]:
def get_random_agent():
    return ag.RandomAgent(None)


# def get_r_agent():
#     return ag.LabelDecisorAgent(
#         label_predictor=ag.GraphLabelLoader(
#             simulator=simulator,
#         ),
#         decision_maker=ag.RDecisionMaker(
#             rand=0.4,
#             seed=None,
#         )
#     )


def get_risky_agent():
    return get_linear_agent(torch.as_tensor([
        [-2]
    ], dtype=torch.float))


def get_safe_agent():
    return get_linear_agent(torch.as_tensor([
        [2]
    ], dtype=torch.float))


def get_risky_ntm_agent():
    return get_linear_agent(torch.as_tensor([
        [-8],
        [8],
        [6],
    ], dtype=torch.float), inverse=True)

def get_safe_ntm_agent():
    return get_linear_agent(torch.as_tensor([
        [8],
        [-8],
        [-6],
    ], dtype=torch.float))


# def get_mc_agent():
#     return ag.LabelDecisorAgent(
#         label_predictor=ag.GraphLabelLoader(
#             simulator=simulator,
#         ),
#         decisor=ag.MarkovChainAgent(
#             rand=0.4,
#             seed=None,
#         )
#     )

## Run

In [8]:
data = []
models = {
    'Random':0,
    'Risky':1,
    'Safe':2,
    'Risky NTM':3,
    'Safe NTM':4,
}

### Random Agent

In [9]:
df_r = run(get_random_agent, simulator, 1000)

100%|██████████| 1000/1000 [00:00<00:00, 5951.64it/s]


In [10]:
df = df_r

In [11]:
df['endings']

Unnamed: 0,run,title,kind
0,0,MaloSilencio,0
1,1,MaloSilencio,0
2,2,MaloSilencio,0
3,3,MaloPadres,0
4,4,MaloAmigos,0
...,...,...,...
995,995,MaloSilencio,0
996,996,MaloSilencio,0
997,997,MaloSilencio,0
998,998,BuenoPadres,1


In [12]:
df['decisions'].groupby(['qid']).mean()
# diferencia entre estos valores y los otros nos dice si tiene predilección por label o no

Unnamed: 0_level_0,run,num,choice,label
qid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1.0,499.317539,12.00556,0.0,
1.0,499.5,0.0,0.493,
2.0,499.5,1.0,0.482,0.482
3.0,499.5,3.0,0.507,
4.0,499.5,5.0,0.466,0.534
5.0,499.5,6.0,0.532,0.532
6.0,486.109023,7.0,0.530075,0.530075
7.0,499.5,7.532,0.53,0.53
8.0,501.490566,8.509434,0.449057,0.449057
9.0,499.5,10.062,0.522,0.478


In [13]:
dec = df['decisions'].loc[df['decisions']['qid'] != -1,:]
dec

Unnamed: 0,run,num,qid,choice,label
0,0,0,1.0,1,
1,0,1,2.0,0,0.0
3,0,3,3.0,0,
5,0,5,4.0,0,1.0
6,0,6,5.0,1,1.0
...,...,...,...,...,...
19900,999,9,9.0,1,0.0
19901,999,10,10.0,0,1.0
19902,999,11,11.0,1,0.0
19904,999,13,12.0,0,0.0


In [14]:
dec = dec[['run','qid','choice']].pivot(index='run', columns='qid', values='choice').fillna(-1)
dec['m'] = models['Random']
data.append(dec)
dec

qid,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,0
1,1.0,1.0,1.0,0.0,0.0,-1.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,-1.0,-1.0,0
2,1.0,1.0,1.0,1.0,0.0,-1.0,0.0,-1.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,0
3,1.0,1.0,1.0,1.0,1.0,1.0,0.0,-1.0,1.0,0.0,0.0,0.0,-1.0,-1.0,0.0,0
4,0.0,1.0,0.0,0.0,0.0,-1.0,0.0,-1.0,0.0,1.0,1.0,0.0,0.0,-1.0,-1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,-1.0,1.0,0
996,0.0,0.0,0.0,1.0,0.0,-1.0,0.0,-1.0,0.0,0.0,1.0,0.0,1.0,-1.0,1.0,0
997,1.0,0.0,1.0,1.0,0.0,-1.0,1.0,0.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,0
998,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,-1.0,-1.0,-1.0,0


### Risky Agent

In [15]:
df_risk = run(get_risky_agent, simulator, 1000)

100%|██████████| 1000/1000 [00:03<00:00, 324.32it/s]


In [16]:
df = df_risk

In [17]:
df['endings']

Unnamed: 0,run,title,kind
0,0,MaloSilencio,0
1,1,MaloAmigos,0
2,2,MaloSilencio,0
3,3,MaloSilencio,0
4,4,MaloSilencio,0
...,...,...,...
995,995,MaloSilencio,0
996,996,MaloSilencio,0
997,997,MaloSilencio,0
998,998,MaloSilencio,0


In [18]:
df['decisions'].groupby(['qid']).mean()
# diferencia entre estos valores y los otros nos dice si tiene predilección por label o no

Unnamed: 0_level_0,run,num,choice,label
qid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1.0,499.637489,11.490059,0.0,
1.0,499.5,0.0,0.78,
2.0,499.5,1.0,0.201,0.201
3.0,499.5,3.0,0.49,
4.0,499.5,5.0,0.868,0.132
5.0,499.5,6.0,0.236,0.236
6.0,514.491525,7.0,0.224576,0.224576
7.0,499.5,7.236,0.279,0.279
8.0,508.648746,8.243728,0.315412,0.315412
9.0,499.5,9.515,0.803,0.197


In [19]:
dec = df['decisions'].loc[df['decisions']['qid'] != -1,:]
dec

Unnamed: 0,run,num,qid,choice,label
0,0,0,1.0,1,
1,0,1,2.0,0,0.0
3,0,3,3.0,1,
5,0,5,4.0,1,0.0
6,0,6,5.0,1,1.0
...,...,...,...,...,...
18830,999,9,8.0,0,0.0
18832,999,11,9.0,1,0.0
18833,999,12,10.0,1,0.0
18834,999,13,11.0,0,1.0


In [20]:
dec = dec[['run','qid','choice']].pivot(index='run', columns='qid', values='choice').fillna(-1)
dec['m'] = models['Risky']
data.append(dec)
dec

qid,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,-1.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,1
1,1.0,0.0,1.0,1.0,0.0,-1.0,0.0,-1.0,0.0,1.0,1.0,0.0,0.0,-1.0,-1.0,1
2,1.0,0.0,0.0,1.0,0.0,-1.0,0.0,-1.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,1
3,0.0,0.0,0.0,1.0,0.0,-1.0,1.0,1.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,1
4,1.0,0.0,1.0,1.0,1.0,0.0,0.0,-1.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,1.0,0.0,0.0,1.0,0.0,-1.0,0.0,-1.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,1
996,1.0,0.0,1.0,1.0,1.0,0.0,0.0,-1.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,1
997,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,1
998,1.0,1.0,1.0,1.0,0.0,-1.0,0.0,-1.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,1


### Risk Aversion (Safe) Agent

In [21]:
df_no_risky = run(get_safe_agent, simulator, 1_000_000)

100%|██████████| 1000000/1000000 [54:28<00:00, 305.95it/s] 


In [22]:
df = df_no_risky

In [23]:
df['endings']

Unnamed: 0,run,title,kind
0,0,MaloSilencio,0
1,1,MaloAmigos,0
2,2,MaloAmigos,0
3,3,MaloAmanda,0
4,4,MaloSilencio,0
...,...,...,...
999995,999995,BuenoAmanda,1
999996,999996,MaloAmanda,0
999997,999997,MaloPadres,0
999998,999998,MaloAmigos,0


In [24]:
df['decisions'].groupby(['qid']).mean()
# diferencia entre estos valores y los otros nos dice si tiene predilección por label o no

Unnamed: 0_level_0,run,num,choice,label
qid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1.0,500046.47329,11.929874,0.0,
1.0,499999.5,0.0,0.228251,
2.0,499999.5,1.0,0.787537,0.787537
3.0,499999.5,3.0,0.50014,
4.0,499999.5,5.0,0.132352,0.867648
5.0,499999.5,6.0,0.753508,0.753508
6.0,499975.366924,7.0,0.759587,0.759587
7.0,499999.5,7.753508,0.735003,0.735003
8.0,500042.86289,8.753435,0.741315,0.741315
9.0,499999.5,10.488511,0.189359,0.810641


In [25]:
dec = df['decisions'].loc[df['decisions']['qid'] != -1,:]
dec

Unnamed: 0,run,num,qid,choice,label
0,0,0,1.0,0,
1,0,1,2.0,1,1.0
3,0,3,3.0,1,
5,0,5,4.0,1,0.0
6,0,6,5.0,0,0.0
...,...,...,...,...,...
20160449,999999,9,8.0,1,1.0
20160451,999999,11,9.0,0,1.0
20160452,999999,12,10.0,0,1.0
20160453,999999,13,11.0,0,1.0


In [26]:
dec = dec[['run','qid','choice']].pivot(index='run', columns='qid', values='choice').fillna(-1)
dec['m'] = models['Safe']
data.append(dec)
dec

qid,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,0.0,1.0,1.0,1.0,0.0,-1.0,1.0,1.0,1.0,0.0,0.0,0.0,-1.0,-1.0,1.0,2
1,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,-1.0,2
2,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,2
3,0.0,1.0,0.0,0.0,0.0,-1.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,2
4,0.0,1.0,1.0,0.0,1.0,0.0,0.0,-1.0,0.0,0.0,1.0,0.0,1.0,-1.0,1.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
999995,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,-1.0,-1.0,-1.0,2
999996,0.0,0.0,0.0,0.0,0.0,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,2
999997,0.0,1.0,0.0,0.0,0.0,-1.0,1.0,1.0,1.0,0.0,0.0,0.0,-1.0,-1.0,0.0,2
999998,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,2


### Risky But Not Too Much Agent

In [27]:
df_risk_ntm = run(get_risky_ntm_agent, simulator, 100_000)

100%|██████████| 100000/100000 [06:47<00:00, 245.62it/s]


In [28]:
df = df_risk_ntm

In [29]:
df['endings']

Unnamed: 0,run,title,kind
0,0,MaloSilencio,0
1,1,MaloPadres,0
2,2,MaloSilencio,0
3,3,MaloSilencio,0
4,4,MaloSilencio,0
...,...,...,...
99995,99995,MaloSilencio,0
99996,99996,MaloSilencio,0
99997,99997,MaloAmigos,0
99998,99998,MaloSilencio,0


In [30]:
df['decisions'].groupby(['qid']).mean()
# diferencia entre estos valores y los otros nos dice si tiene predilección por label o no

Unnamed: 0_level_0,run,num,choice,label
qid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1.0,49999.336158,12.044524,0.0,
1.0,49999.5,0.0,0.99209,
2.0,49999.5,1.0,0.42756,0.42756
3.0,49999.5,3.0,0.49896,
4.0,49999.5,5.0,0.92015,0.07985
5.0,49999.5,6.0,0.67321,0.67321
6.0,49979.460777,7.0,0.548016,0.548016
7.0,49999.5,7.67321,0.46671,0.46671
8.0,49987.700649,8.439352,0.4593,0.4593
9.0,49999.5,10.13992,0.69183,0.30817


In [31]:
dec = df['decisions'].loc[df['decisions']['qid'] != -1,:]
dec

Unnamed: 0,run,num,qid,choice,label
0,0,0,1.0,1,
1,0,1,2.0,0,0.0
3,0,3,3.0,1,
5,0,5,4.0,1,0.0
6,0,6,5.0,0,0.0
...,...,...,...,...,...
1974812,99999,10,9.0,0,1.0
1974813,99999,11,10.0,1,0.0
1974814,99999,12,11.0,0,1.0
1974816,99999,14,12.0,0,0.0


In [32]:
dec = dec[['run','qid','choice']].pivot(index='run', columns='qid', values='choice').fillna(-1)
dec['m'] = models['Risky NTM']
data.append(dec)
dec

qid,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,1.0,0.0,1.0,1.0,0.0,-1.0,1.0,0.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,3
1,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,-1.0,-1.0,0.0,3
2,1.0,0.0,1.0,1.0,1.0,1.0,0.0,-1.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,3
3,1.0,1.0,0.0,1.0,1.0,1.0,0.0,-1.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,3
4,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,-1.0,-1.0,1.0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,1.0,1.0,0.0,1.0,1.0,1.0,0.0,-1.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,3
99996,1.0,1.0,0.0,1.0,1.0,1.0,0.0,-1.0,1.0,0.0,1.0,0.0,-1.0,-1.0,1.0,3
99997,1.0,0.0,0.0,1.0,0.0,-1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,-1.0,-1.0,3
99998,1.0,1.0,1.0,1.0,1.0,1.0,0.0,-1.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,3


### Safe But Not Too Much Agent

In [33]:
df_safe_ntm = run(get_safe_ntm_agent, simulator, 100_000)

100%|██████████| 100000/100000 [06:36<00:00, 252.08it/s]


In [34]:
df = df_safe_ntm

In [35]:
df['endings']

Unnamed: 0,run,title,kind
0,0,MaloAmigos,0
1,1,MaloAmigos,0
2,2,MaloAmigos,0
3,3,MaloPadres,0
4,4,MaloPadres,0
...,...,...,...
99995,99995,MaloAmigos,0
99996,99996,MaloAmigos,0
99997,99997,MaloPadres,0
99998,99998,MaloAmigos,0


In [36]:
df['decisions'].groupby(['qid']).mean()
# diferencia entre estos valores y los otros nos dice si tiene predilección por label o no

Unnamed: 0_level_0,run,num,choice,label
qid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1.0,50002.396556,12.114823,0.0,
1.0,49999.5,0.0,0.0075,
2.0,49999.5,1.0,0.85928,0.85928
3.0,49999.5,3.0,0.50155,
4.0,49999.5,5.0,0.63769,0.36231
5.0,49999.5,6.0,0.6071,0.6071
6.0,49847.131197,7.0,0.751408,0.751408
7.0,49999.5,7.6071,0.5032,0.5032
8.0,50167.091037,8.45314,0.638056,0.638056
9.0,49999.5,10.1103,0.44703,0.55297


In [37]:
dec = df['decisions'].loc[df['decisions']['qid'] != -1,:]
dec

Unnamed: 0,run,num,qid,choice,label
0,0,0,1.0,1,
1,0,1,2.0,1,1.0
3,0,3,3.0,1,
5,0,5,4.0,1,0.0
6,0,6,5.0,1,1.0
...,...,...,...,...,...
2001308,99999,10,9.0,1,0.0
2001309,99999,11,10.0,0,1.0
2001310,99999,12,11.0,1,0.0
2001312,99999,14,12.0,0,0.0


In [38]:
dec = dec[['run','qid','choice']].pivot(index='run', columns='qid', values='choice').fillna(-1)
dec['m'] = models['Safe NTM']
data.append(dec)
dec

qid,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,-1.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,4
1,0.0,1.0,1.0,0.0,0.0,-1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,4
2,0.0,1.0,1.0,1.0,1.0,1.0,0.0,-1.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,4
3,0.0,0.0,0.0,1.0,1.0,1.0,0.0,-1.0,1.0,0.0,1.0,0.0,-1.0,-1.0,0.0,4
4,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,0.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,-1.0,-1.0,4
99996,0.0,1.0,0.0,1.0,1.0,1.0,0.0,-1.0,0.0,0.0,1.0,0.0,0.0,1.0,-1.0,4
99997,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,-1.0,-1.0,0.0,4
99998,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,4


## Data Analysis

In [39]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
# from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
# from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
# from sklearn.inspection import DecisionBoundaryDisplay
from tqdm.auto import tqdm
from sklearn.neural_network import MLPClassifier

In [40]:
df = pd.concat(data, ignore_index=True).fillna(-1)
df.to_csv('results.csv', index=False)
df = pd.read_csv('results.csv')
df

Unnamed: 0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,0
1,1.0,1.0,1.0,0.0,0.0,-1.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,-1.0,-1.0,0
2,1.0,1.0,1.0,1.0,0.0,-1.0,0.0,-1.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,0
3,1.0,1.0,1.0,1.0,1.0,1.0,0.0,-1.0,1.0,0.0,0.0,0.0,-1.0,-1.0,0.0,0
4,0.0,1.0,0.0,0.0,0.0,-1.0,0.0,-1.0,0.0,1.0,1.0,0.0,0.0,-1.0,-1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1201995,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,-1.0,-1.0,4
1201996,0.0,1.0,0.0,1.0,1.0,1.0,0.0,-1.0,0.0,0.0,1.0,0.0,0.0,1.0,-1.0,4
1201997,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,-1.0,-1.0,0.0,4
1201998,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,4


In [41]:
n_classes = len(df['m'].unique())

In [42]:
x_train, x_test, y_train, y_test = train_test_split(df.loc[:,df.columns!='m'], df['m'], test_size=0.3)

In [43]:
y_train.value_counts(normalize=True)

2    0.831894
4    0.083365
3    0.083063
1    0.000840
0    0.000838
Name: m, dtype: float64

In [44]:
y_test.value_counts(normalize=True)

2    0.832069
3    0.083502
4    0.082798
0    0.000818
1    0.000813
Name: m, dtype: float64

In [45]:
x_train = x_train.to_numpy()
y_train = y_train.to_numpy()
x_test = x_test.to_numpy()
y_test = y_test.to_numpy()

In [46]:
c_names = [
    'DT',
    'LR',
    'Ada',
    'RF',
    'KNN',
    'MLP',
]

classifiers= [
    lambda : DecisionTreeClassifier(),
    lambda : LogisticRegression(max_iter=500),
    lambda : AdaBoostClassifier(),
    lambda : RandomForestClassifier(),
    lambda : KNeighborsClassifier(n_neighbors=n_classes),
    lambda : MLPClassifier(),
]

In [47]:
m = []
for cf in tqdm(classifiers):
    # print(f"{color.BLUE}{color.BOLD}Classifier {c}{color.END}")
    
    # train model
    c = cf()
    c.fit(x_train, y_train)
    y_pred = c.predict(x_test)

    # f1 scores
    rep = metrics.classification_report(y_true=y_test,y_pred=y_pred, output_dict=True)
    f1 = [rep[str(k)]['f1-score'] for k in models.values()] + [metrics.f1_score(y_true=y_test,y_pred=y_pred, average='weighted')]

    # accuracy
    cm = metrics.confusion_matrix(y_true=y_test,y_pred=y_pred, normalize='true')
    acc = cm.diagonal().tolist() + [rep['accuracy']]

    m.append(pd.DataFrame(data=[f1, acc], columns=list(models.keys()) + ['Global'], index=['F1', 'Accuracy']).round(4))
    
    # # get evaluation info
    # y_pred = c.predict(x_test)
    # print(metrics.classification_report(y_true=y_test,y_pred=y_pred, digits=3))
    # print(metrics.confusion_matrix(y_true=y_test,y_pred=y_pred, normalize='true').round(3))
    # print(metrics.f1_score(y_true=y_test,y_pred=y_pred, average='weighted').round(3))
    # print(metrics.matthews_corrcoef(y_true=y_test,y_pred=y_pred).round(3))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 6/6 [07:47<00:00, 77.95s/it] 


In [48]:
df_m = pd.concat(m, keys=c_names)
df_m.to_excel('results_metrics.xlsx')
df_m

Unnamed: 0,Unnamed: 1,Random,Risky,Safe,Risky NTM,Safe NTM,Global
DT,F1,0.0,0.4867,0.9754,0.9539,0.7885,0.9569
DT,Accuracy,0.0,0.4061,0.9753,0.9636,0.7861,0.9574
LR,F1,0.0,0.446,0.9633,0.9403,0.6391,0.9333
LR,Accuracy,0.0,0.3242,0.9723,0.9516,0.5761,0.9365
Ada,F1,0.0,0.1108,0.9096,0.9264,0.5229,0.8776
Ada,Accuracy,0.0,0.0648,0.8507,0.9406,0.8597,0.8576
RF,F1,0.0,0.4867,0.9754,0.9539,0.7886,0.9569
RF,Accuracy,0.0,0.4061,0.9753,0.9636,0.7859,0.9574
KNN,F1,0.0063,0.4602,0.9729,0.9478,0.7667,0.9525
KNN,Accuracy,0.0034,0.3549,0.9736,0.956,0.7598,0.9531


### Prediction example

In [49]:
c = MLPClassifier()
c.fit(x_train, y_train)

In [50]:
c.predict_proba(x_test[1800][None]).round(4).tolist()

[[0.0005, 0.0, 0.9995, 0.0, 0.0]]

In [51]:
y_test[1800]

2

In [52]:
models

{'Random': 0, 'Risky': 1, 'Safe': 2, 'Risky NTM': 3, 'Safe NTM': 4}