In [1]:
import sys
sys.path.append('../')

In [2]:
import game.agent as ag
from game.run import run
from game.simulator import load_simulator_yarn, GraphSimulator
from pathlib import Path
from tqdm.auto import trange
import pandas as pd
import torch

In [3]:
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

## Description

This game only has one personality trait, which is safety (no risk).

Therefore, a 1 implies a safe action (without risk), and a 0 implies a risky action.



In [4]:
n_runs = int(1e5)
# n_runs = int(100)
seed = None

## Simulator

In [5]:
simulator = load_simulator_yarn('../yarnScripts', graph_file_sfx='_graph_transformed.pickle')

## Agents

In [6]:
# label_predictor=ag.TorchLabelPredictor(
#             model_path=Path('./saved_bert/200_[20]_[30]_1_False_bert-base-multilingual-cased_0.001_adamw_8_max_val_mcc_False_False_100'),
#             use_cpu=False,
#         )

def get_linear_agent(a: torch.Tensor, inverse:bool = False):
    """
    Gets an agent with a BehavioralDecisionMaker that has a linear relation with the memory matrix

    :param torch.Tensor a: matrix A that characterizes the behavior profile
    :param bool inverse: if true, it multiplies A by (1-M) instead of M (first row will still be 1)
    """
    if not inverse:
        dec_maker =  ag.BehavioralDecisionMaker(
            weight_funcs=[
                lambda m,s: (a*m).sum()
            ],
            memory_steps=a.shape[0]-1,
            seed=None,
            deterministic=False,
        )
    else:
        t = torch.zeros(a.shape[0], 1)
        t[0]=1
        dec_maker = ag.BehavioralDecisionMaker(
            weight_funcs=[
                lambda m,s: (a*(s-m+t)).sum()
            ],
            memory_steps=a.shape[0]-1,
            seed=None,
            deterministic=False,
        )

    return ag.LabelDecisorAgent(
        label_predictor=ag.GraphLabelLoader(
            simulator=simulator,
        ),
        decision_maker=dec_maker,
    )

Values greater than 0 imply favoring safety

Values lower than 0 imply not favoring safety (favoring risk)

In [7]:
def get_random_agent():
    return ag.RandomAgent(None)


# def get_r_agent():
#     return ag.LabelDecisorAgent(
#         label_predictor=ag.GraphLabelLoader(
#             simulator=simulator,
#         ),
#         decision_maker=ag.RDecisionMaker(
#             rand=0.4,
#             seed=None,
#         )
#     )


def get_risky_agent():
    return get_linear_agent(torch.as_tensor([
        [-2]
    ], dtype=torch.float))


def get_safe_agent():
    return get_linear_agent(torch.as_tensor([
        [2]
    ], dtype=torch.float))


def get_risky_ntm_agent():
    return get_linear_agent(torch.as_tensor([
        [-8],
        [8],
        [6],
    ], dtype=torch.float), inverse=True)

def get_safe_ntm_agent():
    return get_linear_agent(torch.as_tensor([
        [8],
        [-8],
        [-6],
    ], dtype=torch.float))


# def get_mc_agent():
#     return ag.LabelDecisorAgent(
#         label_predictor=ag.GraphLabelLoader(
#             simulator=simulator,
#         ),
#         decisor=ag.MarkovChainAgent(
#             rand=0.4,
#             seed=None,
#         )
#     )

## Run

In [8]:
data = []
models = {
    'Random':0,
    'Risky':1,
    'Safe':2,
    'Risky NTM':3,
    'Safe NTM':4,
}

### Random Agent

In [9]:
df_r = run(get_random_agent, simulator, n_runs)

100%|██████████| 100000/100000 [00:16<00:00, 5966.67it/s]


In [10]:
df = df_r

In [11]:
df['endings']

Unnamed: 0,run,title,kind
0,0,MaloSilencio,0
1,1,MaloSilencio,0
2,2,MaloSilencio,0
3,3,MaloAmigos,0
4,4,MaloSilencio,0
...,...,...,...
99995,99995,MaloSilencio,0
99996,99996,MaloSilencio,0
99997,99997,MaloSilencio,0
99998,99998,MaloPadres,0


In [12]:
df['decisions'].groupby(['qid']).mean()
# diferencia entre estos valores y los otros nos dice si tiene predilección por label o no

Unnamed: 0_level_0,run,num,choice,label
qid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1.0,49994.378823,11.966649,0.0,
1.0,49999.5,0.0,0.50163,
2.0,49999.5,1.0,0.50151,0.50151
3.0,49999.5,3.0,0.50074,
4.0,49999.5,5.0,0.50138,0.49862
5.0,49999.5,6.0,0.5008,0.5008
6.0,50028.556969,7.0,0.49998,0.49998
7.0,49999.5,7.5008,0.49975,0.49975
8.0,50175.523402,8.50037,0.495168,0.495168
9.0,49999.5,10.00055,0.4996,0.5004


In [13]:
dec = df['decisions'].loc[df['decisions']['qid'] != -1,:]
dec

Unnamed: 0,run,num,qid,choice,label
0,0,0,1.0,1,
1,0,1,2.0,1,1.0
3,0,3,3.0,1,
5,0,5,4.0,0,1.0
6,0,6,5.0,0,0.0
...,...,...,...,...,...
1988018,99999,11,9.0,1,0.0
1988019,99999,12,10.0,0,1.0
1988020,99999,13,11.0,0,1.0
1988022,99999,15,12.0,0,0.0


In [14]:
dec = dec[['run','qid','choice']].pivot(index='run', columns='qid', values='choice').fillna(-1)
dec['m'] = models['Random']
data.append(dec)
dec

qid,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,1.0,1.0,1.0,0.0,0.0,-1.0,1.0,1.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,0
1,1.0,1.0,0.0,0.0,0.0,-1.0,1.0,1.0,1.0,0.0,0.0,0.0,-1.0,-1.0,1.0,0
2,1.0,0.0,0.0,1.0,0.0,-1.0,1.0,1.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,0
3,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,-1.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,0
4,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,-1.0,-1.0,1.0,0
99996,0.0,0.0,0.0,1.0,0.0,-1.0,1.0,0.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,0
99997,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,0
99998,0.0,1.0,0.0,0.0,1.0,1.0,0.0,-1.0,0.0,0.0,1.0,0.0,1.0,-1.0,0.0,0


### Risky Agent

In [15]:
df_risk = run(get_risky_agent, simulator, n_runs)

100%|██████████| 100000/100000 [05:16<00:00, 315.81it/s]


In [16]:
df = df_risk

In [17]:
df['endings']

Unnamed: 0,run,title,kind
0,0,MaloSilencio,0
1,1,MaloSilencio,0
2,2,MaloSilencio,0
3,3,MaloSilencio,0
4,4,MaloSilencio,0
...,...,...,...
99995,99995,MaloAmigos,0
99996,99996,MaloSilencio,0
99997,99997,MaloSilencio,0
99998,99998,MaloSilencio,0


In [18]:
df['decisions'].groupby(['qid']).mean()
# diferencia entre estos valores y los otros nos dice si tiene predilección por label o no

Unnamed: 0_level_0,run,num,choice,label
qid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1.0,49998.965998,11.487514,0.0,
1.0,49999.5,0.0,0.77252,
2.0,49999.5,1.0,0.21335,0.21335
3.0,49999.5,3.0,0.50041,
4.0,49999.5,5.0,0.86891,0.13109
5.0,49999.5,6.0,0.24617,0.24617
6.0,50013.021855,7.0,0.239266,0.239266
7.0,49999.5,7.24617,0.2679,0.2679
8.0,50038.971295,8.248339,0.261478,0.261478
9.0,49999.5,9.51407,0.81095,0.18905


In [19]:
dec = df['decisions'].loc[df['decisions']['qid'] != -1,:]
dec

Unnamed: 0,run,num,qid,choice,label
0,0,0,1.0,1,
1,0,1,2.0,0,0.0
3,0,3,3.0,1,
5,0,5,4.0,1,0.0
6,0,6,5.0,0,0.0
...,...,...,...,...,...
1883237,99999,7,7.0,0,0.0
1883239,99999,9,9.0,1,0.0
1883240,99999,10,10.0,1,0.0
1883241,99999,11,11.0,1,0.0


In [20]:
dec = dec[['run','qid','choice']].pivot(index='run', columns='qid', values='choice').fillna(-1)
dec['m'] = models['Risky']
data.append(dec)
dec

qid,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,1.0,0.0,1.0,1.0,0.0,-1.0,1.0,1.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,1
1,1.0,0.0,0.0,1.0,1.0,0.0,0.0,-1.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,1
2,1.0,0.0,1.0,1.0,0.0,-1.0,0.0,-1.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,1
3,1.0,0.0,1.0,1.0,0.0,-1.0,0.0,-1.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,1
4,1.0,0.0,1.0,1.0,1.0,0.0,0.0,-1.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,0.0,0.0,1.0,1.0,0.0,-1.0,0.0,-1.0,0.0,1.0,1.0,0.0,0.0,-1.0,-1.0,1
99996,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,1
99997,1.0,0.0,0.0,1.0,0.0,-1.0,1.0,0.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,1
99998,1.0,0.0,1.0,1.0,0.0,-1.0,0.0,-1.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,1


### Risk Aversion (Safe) Agent

In [21]:
df_no_risky = run(get_safe_agent, simulator, n_runs)

100%|██████████| 100000/100000 [05:39<00:00, 294.23it/s]


In [22]:
df = df_no_risky

In [23]:
df['endings']

Unnamed: 0,run,title,kind
0,0,BuenoPadres,1
1,1,MaloAmanda,0
2,2,BuenoAmanda,1
3,3,MaloAmanda,0
4,4,MaloAmanda,0
...,...,...,...
99995,99995,MaloPadres,0
99996,99996,MaloAmigos,0
99997,99997,BuenoAmanda,1
99998,99998,MaloAmanda,0


In [24]:
df['decisions'].groupby(['qid']).mean()
# diferencia entre estos valores y los otros nos dice si tiene predilección por label o no

Unnamed: 0_level_0,run,num,choice,label
qid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1.0,49987.730488,11.931889,0.0,
1.0,49999.5,0.0,0.23032,
2.0,49999.5,1.0,0.79033,0.79033
3.0,49999.5,3.0,0.49732,
4.0,49999.5,5.0,0.13308,0.86692
5.0,49999.5,6.0,0.75363,0.75363
6.0,49974.417207,7.0,0.758529,0.758529
7.0,49999.5,7.75363,0.73679,0.73679
8.0,50137.141886,8.752535,0.740971,0.740971
9.0,49999.5,10.49042,0.19095,0.80905


In [25]:
dec = df['decisions'].loc[df['decisions']['qid'] != -1,:]
dec

Unnamed: 0,run,num,qid,choice,label
0,0,0,1.0,0,
1,0,1,2.0,1,1.0
3,0,3,3.0,0,
5,0,5,4.0,0,1.0
6,0,6,5.0,1,1.0
...,...,...,...,...,...
2016321,99999,9,8.0,1,1.0
2016323,99999,11,9.0,0,1.0
2016324,99999,12,10.0,0,1.0
2016325,99999,13,11.0,0,1.0


In [26]:
dec = dec[['run','qid','choice']].pivot(index='run', columns='qid', values='choice').fillna(-1)
dec['m'] = models['Safe']
data.append(dec)
dec

qid,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,-1.0,-1.0,-1.0,2
1,0.0,1.0,0.0,0.0,1.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,2
2,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,-1.0,-1.0,-1.0,2
3,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,2
4,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,-1.0,-1.0,0.0,2
99996,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,-1.0,2
99997,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,-1.0,-1.0,-1.0,2
99998,0.0,1.0,1.0,0.0,1.0,1.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,2


### Risky But Not Too Much Agent

In [27]:
df_risk_ntm = run(get_risky_ntm_agent, simulator, n_runs)

100%|██████████| 100000/100000 [07:04<00:00, 235.49it/s]


In [28]:
df = df_risk_ntm

In [29]:
df['endings']

Unnamed: 0,run,title,kind
0,0,MaloSilencio,0
1,1,MaloAmigos,0
2,2,MaloSilencio,0
3,3,MaloSilencio,0
4,4,MaloSilencio,0
...,...,...,...
99995,99995,MaloSilencio,0
99996,99996,MaloSilencio,0
99997,99997,MaloSilencio,0
99998,99998,MaloSilencio,0


In [30]:
df['decisions'].groupby(['qid']).mean()
# diferencia entre estos valores y los otros nos dice si tiene predilección por label o no

Unnamed: 0_level_0,run,num,choice,label
qid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1.0,50000.007201,12.044365,0.0,
1.0,49999.5,0.0,0.99254,
2.0,49999.5,1.0,0.42696,0.42696
3.0,49999.5,3.0,0.50272,
4.0,49999.5,5.0,0.91979,0.08021
5.0,49999.5,6.0,0.66993,0.66993
6.0,50082.700819,7.0,0.55364,0.55364
7.0,49999.5,7.66993,0.46953,0.46953
8.0,49932.430707,8.435904,0.459864,0.459864
9.0,49999.5,10.13946,0.69215,0.30785


In [31]:
dec = df['decisions'].loc[df['decisions']['qid'] != -1,:]
dec

Unnamed: 0,run,num,qid,choice,label
0,0,0,1.0,1,
1,0,1,2.0,1,1.0
3,0,3,3.0,0,
5,0,5,4.0,1,0.0
6,0,6,5.0,1,1.0
...,...,...,...,...,...
1974827,99999,10,9.0,1,0.0
1974828,99999,11,10.0,0,1.0
1974829,99999,12,11.0,1,0.0
1974831,99999,14,12.0,0,0.0


In [32]:
dec = dec[['run','qid','choice']].pivot(index='run', columns='qid', values='choice').fillna(-1)
dec['m'] = models['Risky NTM']
data.append(dec)
dec

qid,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,-1.0,0.0,1.0,0.0,0.0,1.0,-1.0,-1.0,3
1,1.0,1.0,0.0,1.0,1.0,1.0,0.0,-1.0,0.0,1.0,1.0,0.0,0.0,-1.0,-1.0,3
2,1.0,0.0,0.0,1.0,1.0,0.0,0.0,-1.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,3
3,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,3
4,1.0,1.0,1.0,1.0,1.0,1.0,0.0,-1.0,0.0,1.0,1.0,0.0,1.0,-1.0,-1.0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,-1.0,-1.0,3
99996,1.0,0.0,1.0,1.0,0.0,-1.0,1.0,1.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,3
99997,1.0,0.0,1.0,1.0,0.0,-1.0,1.0,0.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,3
99998,1.0,1.0,1.0,0.0,0.0,-1.0,1.0,1.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,3


### Safe But Not Too Much Agent

In [33]:
df_safe_ntm = run(get_safe_ntm_agent, simulator, n_runs)

100%|██████████| 100000/100000 [06:51<00:00, 242.92it/s]


In [34]:
df = df_safe_ntm

In [35]:
df['endings']

Unnamed: 0,run,title,kind
0,0,MaloSilencio,0
1,1,MaloSilencio,0
2,2,MaloAmigos,0
3,3,MaloAmigos,0
4,4,MaloPadres,0
...,...,...,...
99995,99995,MaloAmigos,0
99996,99996,MaloAmigos,0
99997,99997,MaloPadres,0
99998,99998,MaloSilencio,0


In [36]:
df['decisions'].groupby(['qid']).mean()
# diferencia entre estos valores y los otros nos dice si tiene predilección por label o no

Unnamed: 0_level_0,run,num,choice,label
qid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-1.0,49999.342613,12.11485,0.0,
1.0,49999.5,0.0,0.00736,
2.0,49999.5,1.0,0.85685,0.85685
3.0,49999.5,3.0,0.50006,
4.0,49999.5,5.0,0.63785,0.36215
5.0,49999.5,6.0,0.6059,0.6059
6.0,49963.321522,7.0,0.755587,0.755587
7.0,49999.5,7.6059,0.50267,0.50267
8.0,49968.505859,8.449281,0.635785,0.635785
9.0,49999.5,10.10857,0.44477,0.55523


In [37]:
dec = df['decisions'].loc[df['decisions']['qid'] != -1,:]
dec

Unnamed: 0,run,num,qid,choice,label
0,0,0,1.0,0,
1,0,1,2.0,1,1.0
3,0,3,3.0,0,
5,0,5,4.0,1,0.0
6,0,6,5.0,1,1.0
...,...,...,...,...,...
2001241,99999,10,9.0,0,1.0
2001242,99999,11,10.0,1,0.0
2001243,99999,12,11.0,0,1.0
2001245,99999,14,12.0,0,0.0


In [38]:
dec = dec[['run','qid','choice']].pivot(index='run', columns='qid', values='choice').fillna(-1)
dec['m'] = models['Safe NTM']
data.append(dec)
dec

qid,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,-1.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,4
1,0.0,1.0,1.0,1.0,1.0,1.0,0.0,-1.0,0.0,1.0,0.0,0.0,1.0,-1.0,-1.0,4
2,0.0,1.0,1.0,0.0,0.0,-1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,4
3,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,4
4,0.0,0.0,0.0,0.0,0.0,-1.0,1.0,1.0,1.0,0.0,0.0,0.0,-1.0,-1.0,0.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,0.0,1.0,1.0,1.0,1.0,1.0,0.0,-1.0,0.0,0.0,1.0,0.0,0.0,1.0,-1.0,4
99996,0.0,1.0,1.0,1.0,1.0,1.0,0.0,-1.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,4
99997,0.0,1.0,0.0,0.0,0.0,-1.0,1.0,1.0,1.0,0.0,1.0,0.0,-1.0,-1.0,0.0,4
99998,0.0,1.0,0.0,0.0,0.0,-1.0,1.0,1.0,1.0,0.0,1.0,0.0,-1.0,-1.0,1.0,4


## Data Analysis

In [39]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
# from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
# from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
# from sklearn.inspection import DecisionBoundaryDisplay
from tqdm.auto import tqdm
from sklearn.neural_network import MLPClassifier

In [41]:
# df = pd.concat(data, ignore_index=True).fillna(-1)
# df.to_csv('results.csv', index=False)
df = pd.read_csv('results.csv')
df

Unnamed: 0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,14.0,15.0,17.0,m
0,1.0,1.0,1.0,0.0,0.0,-1.0,1.0,1.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,0
1,1.0,1.0,0.0,0.0,0.0,-1.0,1.0,1.0,1.0,0.0,0.0,0.0,-1.0,-1.0,1.0,0
2,1.0,0.0,0.0,1.0,0.0,-1.0,1.0,1.0,1.0,1.0,0.0,0.0,-1.0,-1.0,-1.0,0
3,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,-1.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,0
4,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,-1.0,-1.0,-1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499995,0.0,1.0,1.0,1.0,1.0,1.0,0.0,-1.0,0.0,0.0,1.0,0.0,0.0,1.0,-1.0,4
499996,0.0,1.0,1.0,1.0,1.0,1.0,0.0,-1.0,0.0,1.0,0.0,0.0,0.0,-1.0,-1.0,4
499997,0.0,1.0,0.0,0.0,0.0,-1.0,1.0,1.0,1.0,0.0,1.0,0.0,-1.0,-1.0,0.0,4
499998,0.0,1.0,0.0,0.0,0.0,-1.0,1.0,1.0,1.0,0.0,1.0,0.0,-1.0,-1.0,1.0,4


In [42]:
n_classes = len(df['m'].unique())

In [43]:
x_train, x_test, y_train, y_test = train_test_split(df.loc[:,df.columns!='m'], df['m'], test_size=0.3)

In [44]:
y_train.value_counts(normalize=True)

1    0.200500
3    0.200454
2    0.200066
4    0.199894
0    0.199086
Name: m, dtype: float64

In [45]:
y_test.value_counts(normalize=True)

0    0.202133
4    0.200247
2    0.199847
3    0.198940
1    0.198833
Name: m, dtype: float64

In [46]:
x_train = x_train.to_numpy()
y_train = y_train.to_numpy()
x_test = x_test.to_numpy()
y_test = y_test.to_numpy()

In [47]:
c_names = [
    'DT',
    'LR',
    'Ada',
    'RF',
    'KNN',
    'MLP',
]

classifiers= [
    lambda : DecisionTreeClassifier(),
    lambda : LogisticRegression(max_iter=500),
    lambda : AdaBoostClassifier(),
    lambda : RandomForestClassifier(),
    lambda : KNeighborsClassifier(n_neighbors=n_classes),
    lambda : MLPClassifier(),
]

In [48]:
m = []
for cf in tqdm(classifiers):
    # print(f"{color.BLUE}{color.BOLD}Classifier {c}{color.END}")
    
    # train model
    c = cf()
    c.fit(x_train, y_train)
    y_pred = c.predict(x_test)

    # f1 scores
    rep = metrics.classification_report(y_true=y_test,y_pred=y_pred, output_dict=True)
    f1 = [rep[str(k)]['f1-score'] for k in models.values()] + [metrics.f1_score(y_true=y_test,y_pred=y_pred, average='weighted')]

    # accuracy
    cm = metrics.confusion_matrix(y_true=y_test,y_pred=y_pred, normalize='true')
    acc = cm.diagonal().tolist() + [rep['accuracy']]

    m.append(pd.DataFrame(data=[f1, acc], columns=list(models.keys()) + ['Global'], index=['F1', 'Accuracy']).round(4))
    
    # # get evaluation info
    # y_pred = c.predict(x_test)
    # print(metrics.classification_report(y_true=y_test,y_pred=y_pred, digits=3))
    # print(metrics.confusion_matrix(y_true=y_test,y_pred=y_pred, normalize='true').round(3))
    # print(metrics.f1_score(y_true=y_test,y_pred=y_pred, average='weighted').round(3))
    # print(metrics.matthews_corrcoef(y_true=y_test,y_pred=y_pred).round(3))

100%|██████████| 6/6 [03:31<00:00, 35.23s/it]


In [71]:
df_m = pd.concat(m, keys=c_names)
df_m.to_excel('results_metrics.xlsx')
df_m

Unnamed: 0,Unnamed: 1,Random,Risky,Safe,Risky NTM,Safe NTM,Global
DT,F1,0.5826,0.8187,0.8416,0.8616,0.8805,0.7965
DT,Accuracy,0.5268,0.8096,0.8245,0.9284,0.9256,0.8023
LR,F1,0.5056,0.8049,0.8295,0.8091,0.8205,0.7533
LR,Accuracy,0.4441,0.8151,0.8111,0.8576,0.88,0.7608
Ada,F1,0.4875,0.4658,0.7648,0.6707,0.7618,0.63
Ada,Accuracy,0.4263,0.3326,0.6806,0.9124,0.8855,0.6471
RF,F1,0.5808,0.8188,0.8416,0.8614,0.8804,0.796
RF,Accuracy,0.5222,0.8098,0.8259,0.9288,0.9275,0.8021
KNN,F1,0.5482,0.784,0.8199,0.8413,0.8614,0.7704
KNN,Accuracy,0.5172,0.7887,0.8193,0.8595,0.8877,0.7738


### Prediction example

In [50]:
c = MLPClassifier()
c.fit(x_train, y_train)

In [68]:
c.predict_proba(x_test[1800][None]).round(4).tolist()

[[0.0691, 0.0832, 0.0009, 0.8461, 0.0007]]

In [70]:
y_test[1800]

3

In [53]:
models

{'Random': 0, 'Risky': 1, 'Safe': 2, 'Risky NTM': 3, 'Safe NTM': 4}