# Example notebook - get all values, rights, duties in ValuePrism for a given sentence

In [1]:
import pandas as pd
from datasets import load_dataset
import numpy as np
from tqdm import tqdm
# add dir above to access KaleidoSys
import sys
sys.path.append("..")
from KaleidoSys import KaleidoSys

# Since the dataset is gated/private, make sure you have run huggingface-cli login
# Request access here: https://huggingface.co/datasets/allenai/ValuePrism
dataset = load_dataset("allenai/ValuePrism", 'full')['train'].to_pandas()
# why not more in dataset?
print(len(dataset))

218408


In [2]:
# get unique across vrd and text
unique = dataset[['text', 'vrd']].drop_duplicates()
# drop na
unique = unique.dropna()

texts, vrds = unique['text'].tolist(), unique['vrd'].tolist()
unique.head()

Unnamed: 0,text,vrd
0,Preservation of life,Value
1,Minimizing harm,Value
2,Autonomy,Value
3,Right to life,Right
4,Right to safety,Right


In [3]:
from KaleidoSys import KaleidoSys
# request access here: https://huggingface.co/allenai/kaleido-small 
system = KaleidoSys(model_name='allenai/kaleido-small') # sizes: small, base, large, xl, xxl

In [4]:
sentence = 'I like to speed in traffic'
def get_scores_sentence(sentence, vrds, texts):
    n = len(vrds)

    valences = system.get_valence([sentence]*n, vrds, texts)
    relevances = system.get_relevance([sentence]*n, vrds, texts)
    # to pandas df with cols:
    # sentence, vrd, text, p_relevant, p_irrelevant, p_support, p_oppose, p_either
    # valence has 3 vals (support, oppose, either)
    # relevance has 2 vals (relevant, irrelevant)
    df = pd.DataFrame({
        'sentence': [sentence]*n,
        'vrd': vrds,
        'text': texts,
        'p_relevant': relevances[:,0],
        'p_irrelevant': relevances[:,1],
        'p_support': valences[:,0],
        'p_oppose': valences[:,1],
        'p_either': valences[:,2]
    })
    return df
df = get_scores_sentence(sentence, vrds, texts)
df

Loading model...


Downloading config.json:   0%|          | 0.00/2.05k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/308M [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Inference: 100%|██████████| 2695/2695 [02:08<00:00, 20.91it/s]
Inference: 100%|██████████| 2695/2695 [02:16<00:00, 19.71it/s]


Unnamed: 0,sentence,vrd,text,p_relevant,p_irrelevant,p_support,p_oppose,p_either
0,I like to speed in traffic,Value,Preservation of life,0.453398,0.546602,0.283359,0.687095,0.029546
1,I like to speed in traffic,Value,Minimizing harm,0.564977,0.435023,0.342308,0.574129,0.083563
2,I like to speed in traffic,Value,Autonomy,0.757150,0.242850,0.298649,0.486385,0.214967
3,I like to speed in traffic,Right,Right to life,0.770135,0.229865,0.132152,0.845755,0.022093
4,I like to speed in traffic,Right,Right to safety,0.960761,0.039239,0.349980,0.612865,0.037155
...,...,...,...,...,...,...,...,...
21548,I like to speed in traffic,Duty,Duty to develop skills,0.712709,0.287291,0.282167,0.554812,0.163020
21549,I like to speed in traffic,Duty,Duty to maintain a positive learning environment,0.002467,0.997533,0.216002,0.668490,0.115508
21550,I like to speed in traffic,Duty,Duty to provide efficient transportation,0.980792,0.019208,0.516533,0.417625,0.065842
21551,I like to speed in traffic,Duty,Duty to follow family's wishes,0.013476,0.986524,0.141503,0.777858,0.080639


In [11]:
sentences = [
    'I like to speed in traffic',
    'I attend a protest',
    'I vote',
]

def get_scores_sentences(sentences, vrds, texts):
    n = len(vrds)
    n_sentences = len(sentences)

    valences = system.get_valence(np.repeat(sentences, n), vrds*n_sentences, texts*n_sentences)
    relevances = system.get_relevance(np.repeat(sentences, n), vrds*n_sentences, texts*n_sentences)
    # to pandas df with cols:
    # sentence, vrd, text, p_relevant, p_irrelevant, p_support, p_oppose, p_either
    # valence has 3 vals (support, oppose, either)
    # relevance has 2 vals (relevant, irrelevant)
    df = pd.DataFrame({
        'sentence': np.repeat(sentences, n),
        'vrd': vrds*n_sentences,
        'text': texts*n_sentences,
        'p_relevant': relevances[:,0],
        'p_irrelevant': relevances[:,1],
        'p_support': valences[:,0],
        'p_oppose': valences[:,1],
        'p_either': valences[:,2]
    })
    return df
df = get_scores_sentences(sentences, vrds, texts)
df

Inference: 100%|██████████| 8083/8083 [06:17<00:00, 21.40it/s]
Inference: 100%|██████████| 8083/8083 [17:49<00:00,  7.56it/s]  


Unnamed: 0,sentence,vrd,text,p_relevant,p_irrelevant,p_support,p_oppose,p_either
0,I like to speed in traffic,Value,Preservation of life,0.453398,0.546602,0.283359,0.687095,0.029546
1,I like to speed in traffic,Value,Minimizing harm,0.564977,0.435023,0.342308,0.574129,0.083563
2,I like to speed in traffic,Value,Autonomy,0.757150,0.242850,0.298649,0.486385,0.214967
3,I like to speed in traffic,Right,Right to life,0.770135,0.229865,0.132152,0.845755,0.022093
4,I like to speed in traffic,Right,Right to safety,0.960761,0.039239,0.349980,0.612865,0.037155
...,...,...,...,...,...,...,...,...
64654,I vote,Duty,Duty to develop skills,0.737345,0.262655,0.373430,0.151304,0.475265
64655,I vote,Duty,Duty to maintain a positive learning environment,0.042868,0.957132,0.399200,0.200805,0.399995
64656,I vote,Duty,Duty to provide efficient transportation,0.013494,0.986506,0.461810,0.256900,0.281290
64657,I vote,Duty,Duty to follow family's wishes,0.083102,0.916898,0.363201,0.213137,0.423662
