In [1]:
# !pip3 install sentence_transformers
# !pip3 install torchview
# !pip install torchsummary
# !brew install graphviz
# !pip install torchviz
%load_ext autotime

import torch
import tqdm
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sentence_transformers import SentenceTransformer, losses
import pandas as pd
import numpy as np
import torch.nn.functional as F
from tqdm.notebook import tqdm
tqdm.pandas()
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.spatial.distance import cosine
# from torchview import draw_graph
# from torchsummary import summary
pd.set_option("max_colwidth", None)

from peft import LoraConfig, get_peft_model, prepare_model_for_int8_training, TaskType, PeftModel # peft-0.7.1
from transformers import (
    AutoModel,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
    AutoConfig,
)

from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from scipy.stats import entropy


Matplotlib is building the font cache; this may take a moment.


time: 14.4 s (started: 2024-04-22 14:51:06 +00:00)


In [2]:
# Load the pre-trained "all-mpnet-base-v2" model
# model = AutoModel.from_pretrained('sentence-transformers/all-mpnet-base-v2')
# summary(model, input_size=(2, 100))

# from torchviz import make_dot

# y = model()
# make_dot(y.mean(), params=dict(model.named_parameters()))
bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_compute_dtype='float16',
        bnb_4bit_use_double_quant=False
        )

model_name = 'sentence-transformers/all-mpnet-base-v2'

# Define your custom stance-aware loss function
class SiameseNetworkMPNet(nn.Module):
    def __init__(self, model_name, tokenizer, normalize=True):
        super(SiameseNetworkMPNet, self).__init__()

        self.model = AutoModel.from_pretrained(model_name)#, quantization_config=bnb_config, trust_remote_code=True)
        self.normalize = normalize
        self.tokenizer = tokenizer

    def forward(self, **inputs):
        model_output = self.model(**inputs)
        attention_mask = inputs['attention_mask']
        last_hidden_states = model_output.last_hidden_state  # First element of model_output contains all token embeddings
        embeddings = torch.sum(last_hidden_states * attention_mask.unsqueeze(-1), 1) / torch.clamp(attention_mask.sum(1, keepdim=True), min=1e-9) # mean_pooling
        if self.normalize:
            embeddings = F.layer_norm(embeddings, embeddings.shape[1:])
            embeddings = F.normalize(embeddings, p=2, dim=1)

        return embeddings



# Custom dataset for your DataFrame
class CustomDataset(Dataset):
    def __init__(self, sentences):
        self.sentences = sentences

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        return self.sentences[idx]


def batch_and_encode(df_sentences_col,finetuned_model_x,is_lora):
    models_dir = '/csg_nas/Vahid/Datasets/StanceAwareSBERT/Models/'
    
    torch.cuda.empty_cache()
    # Load the pre-trained "all-mpnet-base-v2" model
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Initialize the model
    model = SiameseNetworkMPNet(model_name=model_name, tokenizer=tokenizer)

    if finetuned_model_x != 'original':
        if is_lora == False:
            model.load_state_dict(torch.load('/csg_nas/Vahid/Datasets/StanceAwareSBERT/Models/'+finetuned_model_x+'.pt'))
        if is_lora == True:
            model = PeftModel.from_pretrained(model, models_dir+finetuned_model_x)
            model = model.merge_and_unload()

    # Move the model to the appropriate device (CPU or GPU)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    # Assuming you have a DataFrame named df with columns "sentence_i" and "sentence_j"
    dataset_i = CustomDataset(df_sentences_col.tolist())

    # Use DataLoader for batch processing
    batch_size = 32
    dataloader_i = DataLoader(dataset_i, batch_size=batch_size, shuffle=False)

    # Lists to store the embeddings for "sentence_i" and "sentence_j"
    embeddings_i_list = []


    # Encode sentences in batches
    with torch.no_grad():
        model.eval()  # Set the model to evaluation mode
        for sentences_i_batch in tqdm(dataloader_i):
            text1 = tokenizer(sentences_i_batch, return_tensors="pt", max_length=128, truncation=True, padding="max_length")
            embeddings_i = model(**text1.to(device)).tolist()
            embeddings_i = [[round(value, 4) for value in row] for row in embeddings_i]
#             embeddings_i = encode_sentences(sentences_i_batch).tolist()
            embeddings_i_list.extend(embeddings_i)
    return embeddings_i_list

time: 7.26 ms (started: 2024-04-22 14:51:20 +00:00)


## Play-Ground

In [3]:
is_lora = True
finetuned_model_x = f"MPNet_contriplet_removal_50_margin_40_epoch_4"
device='cuda'

models_dir = '/csg_nas/Vahid/Datasets/StanceAwareSBERT/Models/'
def load_model(model_name,finetuned_model_x,is_lora):
    torch.cuda.empty_cache()
    # Load the pre-trained "all-mpnet-base-v2" model
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Initialize the model
    model = SiameseNetworkMPNet(model_name=model_name, tokenizer=tokenizer)
#     model.to(device)

    if finetuned_model_x != 'original':
        if is_lora == False:
            model.load_state_dict(torch.load(models_dir+finetuned_model_x+'.pt'))
        if is_lora == True:
            model = PeftModel.from_pretrained(model, models_dir+finetuned_model_x)
            model = model.merge_and_unload()
    model.eval()
    return model
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-mpnet-base-v2')

model_original=load_model('sentence-transformers/all-mpnet-base-v2',"original",is_lora=is_lora)
model_finetuned=load_model('sentence-transformers/all-mpnet-base-v2',finetuned_model_x,is_lora=is_lora)
# model_original.to(device)
# model_finetuned.to(device)

def two_sentence_similarity(text1,text2):
    text1 = tokenizer(text1, return_tensors="pt", max_length=128, truncation=True, padding="max_length")
    text2 = tokenizer(text2, return_tensors="pt", max_length=128, truncation=True, padding="max_length")
    print('Original Model Cosine Similarity = ',cosine_similarity(model_original(**text1).tolist(),model_original(**text2).tolist())[0][0])
    print('FineTuned Model Cosine Similarity = ',cosine_similarity(model_finetuned(**text1).tolist(),model_finetuned(**text2).tolist())[0][0])
    return None

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

time: 13 s (started: 2024-04-22 14:51:20 +00:00)


In [12]:
two_sentence_similarity("The weather is good",
                        "The weather is brilliant")

Original Model Cosine Similarity =  0.7526336018707245
FineTuned Model Cosine Similarity =  0.7984234648022263
time: 559 ms (started: 2024-03-20 16:36:32 +00:00)


# BERT-Search Congresspeople

In [7]:
data=pd.read_csv("/csg_nas/Vahid/Datasets/StanceAwareSBERT/Datasets/congresspeople_alltweets.csv")
data = data.dropna()
data

  data=pd.read_csv("/csg_nas/Vahid/Datasets/StanceAwareSBERT/Datasets/congresspeople_alltweets.csv")


Unnamed: 0,id,screen_name,user_id,time,link,text,party
0,1197519924819877888,RepAdams,2.916087e+09,2019-11-21T09:19:33-05:00,https://www.twitter.com/RepAdams/statuses/1197519924819877888,The House continues its #impeachment investigation today. Watch with me live: \nhttps://www.pscp.tv/w/1lPKqepyoYlKb,D
1,1197602898844274688,RepAdams,2.916087e+09,2019-11-21T14:49:16-05:00,https://www.twitter.com/RepAdams/statuses/1197602898844274688,"Serving as Chair of the @EdLaborCmte Subcommittee on #WorkforceProtections is a responsibility &amp; privilege I embrace with great pride. \n\nToday, I am pleased with the House's passage of H.R. 1309, the Workforce Violence Prevention for Health Care and Social Service Workers Act. http://pbs.twimg.com/ext_tw_video_thumb/1197602701590302720/pu/img/UXIrCeuumqEXcsjI.jpg https://video.twimg.com/ext_tw_video/1197602701590302720/pu/vid/480x270/JMWUWRQ08PkVwakz.mp4?tag=10",D
2,1197658814612459520,RepAdams,2.916087e+09,2019-11-21T18:31:27-05:00,https://www.twitter.com/RepAdams/statuses/1197658814612459520,"#Pancreaticcancer is the third leading cause of cancer-related deaths in the United States. Today, my staff and I wore shades of purple to raise awareness. I encourage you to visit http://pancan.org to learn more &amp; join me in the fight against pancreatic cancer. #PANCaware http://pbs.twimg.com/media/EJ7wV5HXkAA4Oo0.jpg",D
3,1197547605435330560,RepAdams,2.916087e+09,2019-11-21T11:09:33-05:00,https://www.twitter.com/RepAdams/statuses/1197547605435330561,"230 years ago, #NorthCarolina was the 12th state admitted to the Union and I’m proud to represent NC’s 12th Congressional District in Congress!\n\n#HappyBirthday, North Carolina! 🎊 http://pbs.twimg.com/media/EJ6LMwsX0AAc5RR.jpg",D
4,1197612744553508864,RepAdams,2.916087e+09,2019-11-21T15:28:23-05:00,https://www.twitter.com/RepAdams/statuses/1197612744553508869,The Workplace Violence Prevention for Health Care and Social Service Workers Act will protect health care &amp; social service workers from workplace violence by forcing @OSHA_DOL to require employers to implement violence prevention plans.,D
...,...,...,...,...,...,...,...
2321941,1479504101121695744,michaelgwaltz,8.336739e+08,2022-01-07T12:24:00-05:00,https://www.twitter.com/michaelgwaltz/statuses/1479504101121695746,".@JayCollinsFL fought for America around the world. Now, he’s ready to do it again, but this time, in Congress.\n\nProud to endorse a fellow Green Beret and patriot for #FL14! 👊🇺🇸 #FlipTheHouse http://pbs.twimg.com/media/FIgl0YvWQAIXOc-.jpg",R
2321942,1479531953904881664,michaelgwaltz,8.336739e+08,2022-01-07T14:14:40-05:00,https://www.twitter.com/johnondrasik/statuses/1479524323182276610,RT @johnondrasik I have just received notification that @YouTube has taken down the Blood on My Hands - White House Docu-Music video. http://pbs.twimg.com/media/FIhSNVjVkAEnBFg.jpg,R
2321943,1479613572678582272,michaelgwaltz,8.336739e+08,2022-01-07T19:39:00-05:00,https://www.twitter.com/RepMcCaul/statuses/1479575154430722048,RT @RepMcCaul It's absolutely ridiculous @YouTube removed @johnondrasik's video depicting the true events that occurred during @POTUS' horrific withdrawal from #Afghanistan. Americans deserve to see the consequences of @POTUS' decisions. YouTube should put the video back up.,R
2321944,1446850186253422592,michaelgwaltz,8.336739e+08,2021-10-09T10:49:00-04:00,https://www.twitter.com/michaelgwaltz/statuses/1446850186253422596,Brave veterans organizations have dropped everything to help Americans left stranded — and they aren't going to let the State Department turn the page.\n\nNot while hundreds of Americans are still in Afghanistan. No one left behind. http://pbs.twimg.com/ext_tw_video_thumb/1446517796863631362/pu/img/xFw6IAUp4fPRYvE8.jpg https://video.twimg.com/ext_tw_video/1446517796863631362/pu/vid/480x270/WWUmn4fKygjb9tA-.mp4?tag=12,R


time: 41.7 s (started: 2024-04-22 14:52:10 +00:00)


In [8]:
data.drop_duplicates(subset='screen_name')['party'].value_counts()

D    292
R    270
I      2
Name: party, dtype: int64

time: 60.4 ms (started: 2024-04-22 14:52:52 +00:00)


In [9]:
data['party'].value_counts()

D    1441581
R     840666
I       9256
Name: party, dtype: int64

time: 163 ms (started: 2024-04-22 14:52:52 +00:00)


In [15]:
# congress_data=pd.read_pickle("/csg_nas/Vahid/Datasets/StanceAwareSBERT/Datasets/congresspeople_alltweets_vectorized.pkl") 
# final_sample.to_pickle("/csg_nas/Vahid/Datasets/StanceAwareSBERT/Datasets/congresspeople_alltweets_vectorized_100k_sample.pkl")
# final_sample=pd.read_pickle("/csg_nas/Vahid/Datasets/StanceAwareSBERT/Datasets/congresspeople_alltweets_vectorized_100k_sample.pkl")

# # Assuming your DataFrame is named 'df'
# sample_size = 10000

# # Sample 50,000 rows with 'D' party and 50,000 rows with 'R' party
# sampled_df_D = final_sample[final_sample['party'] == 'D'].sample(n=sample_size//2, random_state=1)
# sampled_df_R = final_sample[final_sample['party'] == 'R'].sample(n=sample_size//2, random_state=1)

# # Concatenate the two samples to get the final sample
# final_sample = pd.concat([sampled_df_D, sampled_df_R],ignore_index=True)

# final_sample[finetuned_model_x] = batch_and_encode(df_sentences_col=final_sample['text'],finetuned_model_x = finetuned_model_x)
# final_sample

time: 3.77 s (started: 2024-02-20 09:15:14 +00:00)


In [10]:
data_abortion=data[data['text'].str.contains('abortion')]
data_abortion

Unnamed: 0,id,screen_name,user_id,time,link,text,party
385,1220083093475921920,RepAdams,2.916087e+09,2020-01-22T15:37:32-05:00,https://www.twitter.com/RepAdams/statuses/1220083093475921921,"On the anniversary of #RoevWade, I know abortion access is hanging on by a thread: states blatantly passing unconstitutional abortion bans, courts being stacked against women's rights...\n\nRest assured that I won’t stop fighting for our rights — because we can’t &amp; WON'T go back.",D
688,1471922881621860352,RepAdams,2.916087e+09,2021-12-17T14:18:56-05:00,https://www.twitter.com/ProChoiceCaucus/statuses/1471910504989970438,"RT @ProChoiceCaucus Today marks the 48th anniversary of the #HelmsAmendment – a discriminatory policy that denies abortion care for millions worldwide. We’re proud that the House took a critical step toward ending it this year – it’s past time to #RepealHelms, Hyde, and ALL abortion coverage bans.",D
767,1048302597269065728,RepAdams,2.916087e+09,2018-10-05T16:03:10-04:00,https://www.twitter.com/RepAdams/statuses/1048302597269065736,"If Kavanaugh is confirmed to the Supreme Court, 25 MILLION women risk losing abortion access, including:\n-4.3 million Hispanic or Latina women\n-3.5 Black or African American women\n-800,000 Asian women\n-300,000 American Indian or Alaska Native women",D
778,1466045434414211072,RepAdams,2.916087e+09,2021-12-01T09:04:04-05:00,https://www.twitter.com/RepDianaDeGette/statuses/1466028253546745873,"RT @RepDianaDeGette Today, SCOTUS hears the most important abortion case in decades. \n\nTogether with my fellow @ProChoiceCaucus co-chair @RepBarbaraLee, we are fighting to protect the constitutional right to abortion &amp; ensure all Americans can make their own decisions about their health &amp; futures.",D
779,1466083337999687680,RepAdams,2.916087e+09,2021-12-01T11:34:40-05:00,https://www.twitter.com/USProgressives/statuses/1466072718089015304,"RT @USProgressives Right now, the Supreme Court is hearing arguments in Dobbs v. Jackson Women's Health Organization, the case that directly challenges Roe v. Wade and the constitutional right to abortion.\n \nIt is no exaggeration: the future of legal abortion in the United States is at stake.",D
...,...,...,...,...,...,...,...
2318412,1354107413608226816,michaelgwaltz,8.336739e+08,2021-01-26T11:42:20-05:00,https://www.twitter.com/michaelgwaltz/statuses/1354107413608226816,"For decades, Democrats &amp; Republicans have come together to support the Hyde Amendment — including President Biden.\n\nNow all of a sudden, the Radical Left is waging a war on this 40 year old bipartisan legislation, hoping to force taxpayers into paying for abortions. https://twitter.com/RepWaltzPress/status/1354101936664928257 QT @RepWaltzPress Rep. Waltz joined @RepJimBanks and 198 Republican colleagues to #SaveHyde by opposing any funding bill that eliminates or weakens the Hyde Amendment or any current-law, pro-life appropriations provisions. \n\n⬇️ Read more:\nhttps://www.realclearpolitics.com/articles/2021/01/26/house_gop_draws_red_line_on_federally_funded_abortion_145118.html",R
2318428,1301321090447347712,michaelgwaltz,8.336739e+08,2020-09-02T20:48:19-04:00,https://www.twitter.com/michaelgwaltz/statuses/1301321090447347712,"America's future is at stake in this election. If Biden wins, Dems will push thru legislation to:\n\n❌ Defund our police, ICE &amp; federal prisons\n❌ Lift ban on federal abortion funding\n❌ Pass Green New Deal &amp; Medicare for all\n❌ Make DC the 51st state\n❌ Harvest national ballots http://pbs.twimg.com/ext_tw_video_thumb/1301321010545860609/pu/img/Ja072vGqKu-RKcF8.jpg https://video.twimg.com/ext_tw_video/1301321010545860609/pu/vid/480x270/JN3-mfshIb85scyx.mp4?tag=10",R
2320506,1364594487142588416,michaelgwaltz,8.336739e+08,2021-02-24T10:14:13-05:00,https://www.twitter.com/michaelgwaltz/statuses/1364594487142588416,Dems are trying to cram their Far Left agenda into a $1.9T bill that:\n\n❌ Kills jobs\n❌ Keeps schools closed\n❌ Promotes welfare wishlist\n❌ Encourages illegal immigration\n❌ Promotes pro-abortion policies\n❌ Spends our grandchildren’s money,R
2320960,1398040743919374336,michaelgwaltz,8.336739e+08,2021-05-27T18:17:42-04:00,https://www.twitter.com/michaelgwaltz/statuses/1398040743919374340,Jim is right. The Hyde amendment has been an integral policy in preventing taxpayers from funding abortions for years. https://twitter.com/RepJimBanks/status/1397915261596889094 QT @RepJimBanks Democrats should know that Republicans are UNITED and will do everything in our power to #SaveHyde.\n\nhttps://www.wsj.com/articles/bidens-budget-to-spotlight-battle-over-abortion-restrictions-11622118882,R


time: 3.85 s (started: 2024-04-22 14:52:58 +00:00)


In [11]:
finetuned_model_x = f'MPNet_contriplet_removal_50_margin_40_epoch_4'

def get_similar_sentences(model_x, query_x,df, model_name_x,threshold_x,is_lora):
    df_ = df.copy()
    text1 = tokenizer(query_x, return_tensors="pt", max_length=128, truncation=True, padding="max_length")
    embeddings_i = model_x(**text1).tolist()
    embeddings_i = [[round(value, 4) for value in row] for row in embeddings_i]
    query_column_x = f'embeddings_{model_name_x}'
    df_[query_column_x] = batch_and_encode(df_sentences_col=df['text'],finetuned_model_x = model_name_x,is_lora=is_lora)
    df_[f'cosim_{query_column_x}'] = df_[query_column_x].progress_apply(lambda x: cosine_similarity([x],embeddings_i)[0][0])
    df__ = df_[df_[f'cosim_{query_column_x}']>threshold_x]
    df_.sort_values(by=f'cosim_{query_column_x}', axis=0, ascending=False, inplace=True, kind='quicksort', na_position='last', ignore_index=False, key=None)
    print(df__['party'].value_counts())
    df_.drop(columns=query_column_x,inplace=True)
    return df_

# df_original=get_similar_sentences(model_x=model_original, query_x='abortion is murder',df = final_sample, query_column_x = 'all-mpnet-base-v2',threshold_x=0.7)

time: 3.93 ms (started: 2024-04-22 14:53:08 +00:00)


In [12]:
data_abortion_pro_original=get_similar_sentences(model_x=model_original, query_x='abortion is healthcare',df = data_abortion, model_name_x = 'original',threshold_x=0.6,is_lora=False)
data_abortion_pro_finetuned=get_similar_sentences(model_x=model_finetuned, query_x='abortion is healthcare',df = data_abortion, model_name_x = finetuned_model_x,threshold_x=0.6,is_lora=True)
data_abortion_con_original=get_similar_sentences(model_x=model_original, query_x='abortion is murder',df = data_abortion, model_name_x = 'original',threshold_x=0.6,is_lora=False)
data_abortion_con_finetuned=get_similar_sentences(model_x=model_finetuned, query_x='abortion is murder',df = data_abortion, model_name_x = finetuned_model_x,threshold_x=0.6,is_lora=True)


  0%|          | 0/450 [00:00<?, ?it/s]

  0%|          | 0/14373 [00:00<?, ?it/s]

D    973
R    459
I      4
Name: party, dtype: int64


  0%|          | 0/450 [00:00<?, ?it/s]

  0%|          | 0/14373 [00:00<?, ?it/s]

D    644
R    253
I      2
Name: party, dtype: int64


  0%|          | 0/450 [00:00<?, ?it/s]

  0%|          | 0/14373 [00:00<?, ?it/s]

R    95
D    46
I     2
Name: party, dtype: int64


  0%|          | 0/450 [00:00<?, ?it/s]

  0%|          | 0/14373 [00:00<?, ?it/s]

R    12
D     3
Name: party, dtype: int64
time: 4min 55s (started: 2024-04-22 14:53:11 +00:00)


In [11]:
data_abortion_pro_original
data_abortion_pro_original[data_abortion_pro_original[f'cosim_embeddings_original']>0.7]['party'].value_counts()

D    98
R    31
Name: party, dtype: int64

time: 22.1 ms (started: 2024-03-20 10:51:20 +00:00)


In [12]:
data_abortion_pro_finetuned
data_abortion_pro_finetuned[data_abortion_pro_finetuned[f'cosim_embeddings_{finetuned_model_x}']>0.7]['party'].value_counts()

D    43
R     4
Name: party, dtype: int64

time: 10.6 ms (started: 2024-03-20 10:51:27 +00:00)


In [15]:
data_abortion_con_original
data_abortion_con_original[data_abortion_con_original[f'cosim_embeddings_original']>0.6]['party'].value_counts()

R    95
D    46
I     2
Name: party, dtype: int64

time: 6.87 ms (started: 2024-03-20 10:52:06 +00:00)


In [16]:
data_abortion_con_finetuned
data_abortion_con_finetuned[data_abortion_con_finetuned[f'cosim_embeddings_{finetuned_model_x}']>0.6]['party'].value_counts()

R    12
D     3
Name: party, dtype: int64

time: 9.06 ms (started: 2024-03-20 10:52:11 +00:00)


In [30]:
final_sample

Unnamed: 0,screen_name,user_id,time,link,text,party,all-mpnet-base-v2,MPNet_removal_50_margin_60_epoch_1,MPNet_removal_50_margin_90_epoch_1,MPNet_triplet_removal_0_margin_50_epoch_1
0,RepLloydDoggett,1.539449e+08,2019-02-08T18:04:27-05:00,https://www.twitter.com/DavidP4AD/statuses/1093939720940331008,important bill introduced yesterday! there is much energy in congress for direct medicare price negotiations. most advocates &; folks on the hill are working together to achieve the most powerful legislation possible. thanks,D,"[-0.029, 0.0715, -0.0059, -0.0296, -0.009, -0.0136, -0.0212, 0.0363, -0.0067, -0.0058, 0.0309, 0.0271, -0.0033, 0.1152, 0.0456, -0.046, 0.0231, 0.0575, 0.0406, 0.027, 0.0035, -0.0185, -0.0137, -0.0063, -0.0375, 0.0334, -0.0092, -0.0053, -0.009, -0.0856, 0.0447, -0.0424, -0.036, 0.0081, 0.0001, -0.0637, -0.0117, 0.0245, -0.0987, -0.0459, 0.0007, -0.0806, -0.0487, 0.0242, 0.0074, 0.0022, -0.004, 0.0129, -0.0537, -0.0049, -0.0094, 0.0373, 0.027, -0.0379, 0.009, 0.008, -0.0178, -0.0425, -0.0388, -0.0508, 0.0543, 0.0182, -0.0165, -0.0177, -0.0013, 0.0563, 0.0204, 0.0261, 0.0021, 0.015, 0.0457, 0.0044, 0.0424, -0.0197, -0.0071, -0.0168, -0.0173, 0.0004, 0.0296, -0.0003, -0.0005, 0.0129, 0.0171, -0.0162, 0.0283, -0.0244, 0.0037, -0.0456, 0.0363, 0.0041, 0.032, -0.0231, -0.0024, -0.0013, -0.0004, -0.0059, 0.0095, 0.0163, 0.0275, -0.0467, ...]","[-0.0514, 0.1123, -0.0383, -0.0075, -0.0148, 0.0036, -0.0184, 0.0032, -0.0061, 0.0108, 0.041, 0.0481, -0.038, 0.0575, 0.0273, -0.0537, 0.0491, 0.0377, -0.0094, 0.0337, -0.032, 0.0014, -0.0138, 0.0174, -0.0106, -0.0224, 0.0138, 0.0201, 0.0295, -0.0544, 0.0264, -0.0551, -0.0137, 0.0259, 0.0031, -0.0623, -0.0329, 0.0248, -0.0521, -0.0165, 0.0102, -0.0452, -0.0506, 0.0168, 0.0144, -0.0129, 0.0025, 0.0192, -0.0577, 0.0115, 0.0018, 0.0005, -0.0173, -0.0276, -0.0306, 0.0617, 0.0066, -0.0582, 0.0116, -0.0287, 0.0281, 0.012, -0.0261, -0.0591, 0.0166, 0.046, 0.0106, 0.0275, 0.0031, 0.0244, 0.0302, -0.0306, 0.0323, 0.0167, 0.0598, -0.0129, -0.0448, 0.0032, 0.0294, 0.0299, 0.0406, 0.0179, 0.0414, 0.0514, 0.0663, 0.0563, 0.002, -0.0182, 0.039, 0.0353, 0.0109, -0.0525, 0.004, -0.013, 0.0217, -0.006, 0.0356, 0.0199, 0.0186, -0.0146, ...]","[-0.0798, 0.0781, -0.0377, -0.0283, -0.008, 0.0113, -0.0578, 0.0317, -0.0056, -0.0001, 0.0334, 0.0312, -0.0224, 0.0759, 0.0274, -0.053, 0.0503, 0.0431, -0.0296, 0.0256, -0.0325, -0.0004, -0.0536, 0.0073, -0.0139, -0.0177, 0.0032, 0.0215, 0.0173, -0.0478, 0.0201, -0.0602, 0.0045, 0.0417, -0.0025, -0.0608, -0.0566, 0.0183, -0.0512, -0.031, 0.0113, -0.0703, -0.0416, 0.0176, 0.04, -0.0125, 0.0181, -0.0014, -0.066, 0.0047, 0.0012, -0.0218, -0.0235, -0.0275, -0.0371, 0.0513, 0.0044, -0.0414, 0.0329, -0.0376, 0.0367, -0.0244, -0.0171, -0.0557, 0.0191, 0.0487, 0.0159, 0.0589, -0.0099, 0.0365, 0.0379, -0.0464, 0.0432, 0.0171, 0.039, 0.0073, -0.0349, -0.0119, 0.0601, 0.0321, 0.0315, -0.0073, 0.0365, 0.0257, 0.0619, 0.0586, 0.0059, -0.0079, 0.0553, 0.0247, 0.0118, -0.0553, 0.0029, 0.012, -0.0023, -0.0219, 0.0348, 0.026, 0.0217, 0.0079, ...]","[-0.0693, 0.0209, -0.0272, -0.0512, -0.0227, -0.0012, -0.0521, 0.027, -0.0196, 0.028, 0.0095, 0.0278, -0.0012, 0.0695, 0.0527, -0.0567, 0.047, 0.0215, -0.0222, 0.0489, -0.0144, -0.0018, -0.0444, 0.0084, 0.0089, -0.0244, 0.0282, 0.0566, -0.0077, -0.0474, 0.0094, -0.0096, 0.0298, 0.0105, -0.0, -0.0369, -0.0247, 0.0236, -0.065, 0.0219, -0.0037, -0.0629, 0.0232, 0.0111, 0.0458, 0.0041, 0.0129, -0.0226, -0.054, -0.0013, -0.0102, -0.0708, -0.0146, -0.013, -0.0231, 0.0462, 0.0061, -0.0257, 0.0575, -0.0519, 0.0258, -0.0073, 0.013, -0.0487, 0.0659, 0.0181, 0.0022, 0.0652, -0.0169, 0.0108, 0.026, -0.0739, 0.0445, 0.021, 0.0308, -0.0013, -0.06, -0.0395, 0.0544, 0.014, 0.0203, -0.0037, 0.0228, 0.0158, 0.0365, 0.0695, 0.0267, 0.0021, 0.0707, 0.0278, 0.0288, -0.0928, 0.0025, 0.0518, 0.0224, -0.0393, 0.0542, 0.0033, -0.0199, 0.03, ...]"
1,SenBlumenthal,2.781241e+08,2020-12-08T12:50:26-05:00,https://www.twitter.com/maziehirono/statuses/1336344529008545793,fcc nominee nathan simington's only qualification is his eagerness to defend the president's attacks on the first amendment and sec. 230. his attempts to recruit fox news hosts to bully the fcc shows he has no place leading that agency.#stopsimington,D,"[0.0087, 0.0864, 0.0276, -0.023, 0.023, 0.0129, -0.0303, -0.0102, -0.0349, -0.029, -0.0161, -0.0016, -0.0257, -0.0682, -0.0318, 0.0329, 0.0563, 0.0425, 0.0319, -0.0035, -0.0042, 0.0393, -0.0344, 0.0117, -0.0095, -0.0321, 0.0761, 0.0113, 0.0203, -0.0372, 0.018, -0.002, -0.0167, -0.0227, 0.0, 0.0386, 0.0354, 0.0081, -0.0128, -0.0323, 0.0322, 0.0069, -0.0636, 0.0573, -0.0208, 0.017, -0.0289, -0.0361, 0.0654, 0.0497, 0.0105, 0.0483, 0.0012, -0.0109, 0.0158, -0.005, 0.0108, -0.0264, -0.0064, 0.0019, 0.0558, 0.0281, 0.0181, 0.0025, -0.0446, 0.0053, -0.0569, 0.0224, 0.0316, 0.0001, 0.0586, 0.0176, 0.0323, 0.0625, 0.0106, -0.0094, 0.027, 0.0224, -0.0636, -0.0026, 0.0068, -0.0049, -0.0265, 0.0305, 0.0886, 0.0312, 0.0059, -0.0461, 0.024, 0.0296, 0.0913, -0.0495, 0.0057, -0.0316, -0.022, 0.0354, 0.0388, 0.0089, 0.0037, -0.0846, ...]","[-0.017, 0.0922, 0.0098, 0.0295, -0.0354, 0.0059, 0.0405, 0.0302, 0.0069, 0.0236, -0.0105, 0.0455, -0.0029, 0.019, -0.0278, -0.021, 0.066, 0.0343, -0.0189, -0.0194, -0.0512, 0.0184, 0.0555, 0.0422, -0.0054, -0.0535, 0.0227, -0.0169, 0.0237, -0.0011, 0.0342, -0.0545, -0.0547, 0.03, 0.0032, -0.014, 0.0125, 0.0333, -0.0355, 0.0275, 0.0261, 0.0107, -0.0093, 0.0507, -0.0089, -0.0585, -0.0032, 0.0754, 0.0319, 0.0156, 0.0223, 0.0238, -0.0172, -0.0576, -0.0125, 0.0265, -0.003, -0.0065, 0.0154, -0.0545, 0.0215, 0.0335, -0.0521, 0.0061, -0.0327, 0.0567, 0.0094, -0.033, 0.0461, 0.0693, -0.0099, 0.0181, 0.0004, 0.0541, 0.01, -0.0404, 0.0046, 0.0784, -0.04, -0.0176, -0.0235, -0.0006, 0.0143, 0.0603, 0.0934, 0.0257, 0.0548, -0.0385, -0.0051, 0.0341, 0.039, -0.0023, -0.004, -0.08, -0.0119, 0.001, 0.0057, 0.0319, 0.035, -0.0438, ...]","[-0.0316, 0.0607, 0.0236, 0.0209, -0.0363, 0.0186, 0.0201, 0.019, 0.013, 0.0249, -0.0045, 0.0202, -0.0089, 0.0314, -0.0449, 0.0123, 0.052, 0.0514, -0.0288, -0.0356, -0.0307, 0.0227, 0.0444, 0.0388, -0.0283, -0.0566, 0.0294, -0.0314, 0.0195, -0.0026, 0.0271, -0.0599, -0.0504, 0.0304, -0.0023, 0.0051, 0.0066, 0.0531, -0.0223, 0.0195, 0.0113, 0.0012, 0.0053, 0.0588, -0.0044, -0.0712, 0.014, 0.05, 0.0672, 0.0083, 0.009, 0.0047, -0.0589, -0.0512, -0.0144, 0.023, -0.003, 0.0103, 0.0303, -0.0668, 0.026, -0.0007, -0.0531, 0.0211, -0.0436, 0.0511, 0.0093, -0.0052, 0.0446, 0.0697, -0.0089, 0.0135, -0.0001, 0.0679, -0.0211, -0.0375, 0.008, 0.0971, -0.0391, -0.006, -0.0352, -0.0147, 0.0175, 0.0643, 0.0612, 0.0106, 0.0548, -0.0366, 0.0241, 0.0327, 0.0308, 0.0212, -0.0098, -0.0589, -0.0157, -0.0077, 0.0148, 0.0218, 0.0393, -0.0344, ...]","[-0.0498, -0.0444, 0.049, -0.0012, -0.0404, -0.0009, 0.0152, 0.0518, -0.0117, 0.0207, -0.0143, 0.0139, 0.0194, 0.0355, -0.0193, 0.0088, 0.0045, 0.0592, -0.0399, -0.0022, 0.0302, 0.0082, 0.025, 0.0473, -0.0183, -0.053, 0.0068, 0.013, -0.037, -0.0071, 0.032, -0.0051, 0.0021, -0.0283, 0.0004, -0.0028, 0.019, 0.0529, -0.0046, 0.0397, 0.0213, -0.0329, 0.0742, 0.0681, -0.0048, -0.059, -0.0003, 0.0193, 0.047, -0.0052, -0.0082, -0.0353, -0.0372, -0.0446, -0.027, -0.02, 0.013, 0.0365, 0.0536, -0.0817, 0.0091, -0.0046, -0.0209, 0.0313, 0.0083, 0.0505, -0.0286, -0.0015, 0.0413, 0.06, -0.0308, -0.0579, 0.0053, 0.0557, -0.0289, -0.0092, 0.0098, 0.0347, -0.0375, -0.0023, -0.0327, 0.0176, -0.0008, 0.041, 0.0195, 0.0184, 0.0829, -0.0231, 0.0671, 0.0162, 0.0539, -0.0247, -0.0511, -0.0035, -0.0312, -0.0597, 0.0238, 0.0077, 0.0403, 0.0139, ...]"
2,RepDerekKilmer,1.058918e+09,2021-06-17T09:09:31-04:00,https://www.twitter.com/RepDerekKilmer/statuses/1405512936668422152,"today, is holding a hearing to examine the source of political polarization and methods to improve civility &; collaboration throughout congress. we're also experimenting with new hearing rules to foster a more collaborative conversation",D,"[-0.0078, 0.0563, -0.0064, 0.0244, -0.0277, -0.0123, -0.0242, 0.014, -0.0127, -0.0019, 0.0289, 0.0033, 0.0132, 0.0118, 0.02, -0.0842, 0.0352, 0.0135, 0.0633, 0.0018, 0.0007, -0.0035, -0.0098, -0.0089, -0.075, -0.0131, -0.0308, 0.032, 0.028, 0.0108, 0.0189, 0.0083, -0.0127, -0.0248, 0.0001, -0.0216, 0.0162, 0.0126, -0.0469, -0.0552, 0.0442, -0.0588, -0.0348, 0.0389, -0.0202, -0.0178, -0.045, 0.0206, -0.028, -0.032, 0.0221, 0.0494, -0.013, -0.0215, -0.0087, 0.0175, -0.0113, -0.014, -0.0644, -0.0693, 0.0346, -0.0381, -0.0179, -0.0163, -0.0439, 0.0423, -0.0155, 0.0135, -0.0008, 0.0127, 0.0687, 0.0001, 0.0191, 0.0461, 0.0196, -0.0258, -0.0319, 0.0127, -0.0345, -0.0091, 0.0442, -0.0, -0.0143, 0.0348, 0.0658, -0.0079, -0.0006, -0.0264, -0.041, 0.0181, 0.0903, -0.0255, -0.006, 0.0168, 0.0056, -0.0113, 0.0356, 0.0897, 0.0203, -0.0279, ...]","[-0.0158, 0.1043, -0.0436, 0.0025, -0.0173, -0.0004, -0.0262, 0.0083, -0.0073, 0.0189, 0.0317, 0.0342, -0.0261, 0.0327, 0.014, -0.0747, 0.0562, 0.0301, -0.0048, 0.0235, -0.0308, 0.0054, -0.008, 0.0029, -0.0047, -0.0296, 0.0076, 0.018, 0.0384, -0.032, 0.0126, -0.0434, -0.0074, 0.0047, 0.0033, -0.0579, -0.0097, 0.0163, -0.0481, -0.0108, 0.0236, -0.0324, -0.0398, 0.0401, 0.014, -0.0259, -0.009, 0.0257, -0.0507, 0.0176, -0.0008, 0.0044, -0.0204, -0.0278, -0.0178, 0.0753, 0.0036, -0.0419, 0.0162, -0.0352, 0.007, 0.0143, -0.0312, -0.044, 0.0241, 0.0512, -0.0121, 0.0171, -0.002, 0.0122, 0.0375, -0.0387, 0.0285, 0.0159, 0.0748, -0.0114, -0.0444, -0.0073, 0.0149, 0.0302, 0.0444, 0.016, 0.0382, 0.0618, 0.0643, 0.0719, 0.0058, -0.0103, 0.026, 0.0593, 0.0149, -0.0386, -0.0001, -0.0185, 0.0227, -0.0181, 0.0312, 0.0248, 0.0026, -0.004, ...]","[-0.0523, 0.0642, -0.0419, -0.0145, -0.0196, 0.0034, -0.0638, 0.0245, -0.0056, 0.0031, 0.0253, 0.0136, -0.0129, 0.054, 0.0194, -0.0719, 0.061, 0.0281, -0.027, 0.0153, -0.0333, 0.002, -0.0511, -0.0076, -0.0121, -0.0296, -0.0021, 0.0256, 0.0215, -0.0244, 0.0102, -0.0409, 0.0212, 0.0176, -0.0024, -0.0546, -0.0322, 0.0123, -0.0516, -0.0187, 0.0221, -0.0567, -0.0261, 0.0391, 0.0352, -0.0225, 0.0034, 0.0047, -0.0568, 0.0108, 0.0023, -0.0247, -0.0318, -0.0238, -0.0226, 0.0673, -0.0003, -0.0282, 0.0358, -0.0404, 0.0263, -0.0251, -0.0217, -0.051, 0.0231, 0.0481, -0.002, 0.0554, -0.0145, 0.023, 0.052, -0.0557, 0.0421, 0.0201, 0.0562, 0.0089, -0.0379, -0.0225, 0.0515, 0.0372, 0.0456, -0.0014, 0.0335, 0.0376, 0.0595, 0.0734, 0.0063, -0.0037, 0.0387, 0.0585, 0.0176, -0.0521, -0.0036, 0.0144, 0.0063, -0.0335, 0.0276, 0.032, 0.0095, 0.0177, ...]","[-0.0487, 0.0075, -0.0289, -0.0345, -0.0258, -0.0072, -0.0568, 0.0189, -0.0243, 0.0281, -0.0075, 0.0208, 0.0051, 0.058, 0.0403, -0.0557, 0.0534, 0.0213, -0.0209, 0.0417, -0.0133, -0.003, -0.0389, 0.0099, 0.0179, -0.031, 0.0155, 0.0664, -0.007, -0.0373, 0.011, 0.0072, 0.032, -0.0054, 0.0001, -0.0275, -0.0083, 0.0213, -0.0631, 0.04, 0.0056, -0.0517, 0.0324, 0.0243, 0.0547, -0.0075, 0.006, -0.0202, -0.0446, -0.0021, -0.0085, -0.0616, -0.0231, -0.0121, -0.0163, 0.0441, 0.0051, -0.0197, 0.0578, -0.064, 0.0204, -0.0071, 0.0069, -0.0443, 0.0652, 0.0172, -0.0116, 0.065, -0.0196, 0.0004, 0.0199, -0.0903, 0.0535, 0.0221, 0.0377, -0.0025, -0.0539, -0.0542, 0.0444, 0.0157, 0.024, -0.002, 0.0285, 0.0209, 0.0376, 0.0747, 0.0369, 0.0024, 0.0748, 0.0502, 0.0389, -0.0888, -0.0023, 0.0454, 0.0209, -0.0477, 0.0458, 0.0075, -0.024, 0.0312, ...]"
3,RepRoKhanna,8.162989e+17,2022-01-05T17:39:03-05:00,https://www.twitter.com/RepRoKhanna/statuses/1478858610012835850,"in the wealthiest nation on earth, no parent should have to work multiple jobs to keep food on the table for their family.",D,"[-0.075, 0.0697, 0.0287, 0.0514, -0.0137, -0.0028, 0.0466, -0.0239, 0.0007, -0.007, 0.0652, 0.0234, -0.0158, -0.0411, -0.0148, 0.0642, 0.0215, 0.012, -0.0303, 0.0125, -0.0347, 0.0054, 0.0567, 0.0331, -0.0331, -0.0473, 0.0474, -0.0206, 0.0184, 0.0749, 0.0725, 0.0098, -0.0286, 0.0109, -0.0003, 0.0084, -0.0076, 0.0097, 0.0045, 0.0197, 0.0226, -0.0574, 0.0101, 0.0299, -0.0258, 0.0314, 0.0051, 0.0384, -0.018, -0.0771, 0.0215, 0.035, -0.0368, 0.0196, -0.0572, 0.021, 0.024, -0.0093, -0.0685, -0.0699, 0.0195, 0.0193, -0.0458, -0.0874, -0.0214, 0.0285, 0.0063, -0.0383, 0.0072, 0.0205, 0.0007, -0.0188, 0.0041, -0.0007, 0.0008, 0.0514, -0.0135, 0.0697, -0.0331, 0.0022, 0.071, 0.0016, 0.0167, -0.0166, -0.0027, 0.022, -0.0484, -0.043, -0.0294, 0.0331, 0.001, 0.0171, 0.034, -0.0472, -0.0029, -0.0078, -0.0272, -0.0415, -0.0267, -0.036, ...]","[-0.031, 0.081, 0.0195, 0.0313, -0.0297, -0.024, 0.0621, -0.0057, 0.0115, 0.0419, 0.0157, 0.0458, -0.0129, -0.0021, -0.0463, -0.0219, 0.046, 0.0102, -0.0225, 0.007, -0.0554, 0.0183, 0.0342, 0.0341, 0.0042, -0.0773, 0.0179, -0.0108, 0.0237, 0.012, 0.0698, -0.0611, -0.0245, 0.0466, 0.0036, -0.0365, 0.0012, 0.0266, -0.0034, 0.0473, 0.0316, -0.0137, 0.004, 0.0488, -0.0019, -0.0416, 0.0082, 0.0608, -0.0039, -0.0063, 0.0106, 0.0195, -0.0357, -0.0499, -0.0395, 0.0299, -0.021, -0.0082, 0.0042, -0.056, 0.0335, 0.0483, -0.0794, -0.0267, -0.0043, 0.0475, -0.0039, -0.0422, 0.0254, 0.0731, -0.0114, 0.0053, 0.0132, 0.038, 0.008, -0.0375, -0.0066, 0.0553, -0.0126, -0.0194, -0.0289, -0.0017, 0.0286, 0.0594, 0.059, 0.0212, 0.0393, -0.0251, 0.0073, 0.0233, 0.0272, 0.0055, -0.0071, -0.0755, -0.0164, -0.0177, 0.0199, 0.033, 0.0164, -0.0533, ...]","[-0.0439, 0.045, 0.0298, 0.0315, -0.0304, -0.0189, 0.0623, -0.0075, 0.0218, 0.0314, 0.0331, 0.0276, -0.0395, 0.0096, -0.0665, 0.0236, 0.0435, 0.0326, -0.025, -0.019, -0.0411, -0.0001, 0.0244, 0.0212, -0.0132, -0.0759, 0.0102, -0.0313, 0.0084, -0.0011, 0.0702, -0.0595, -0.0407, 0.0683, -0.0025, -0.0269, -0.0063, 0.0379, 0.0205, 0.0238, 0.0301, -0.0222, 0.0069, 0.0634, 0.0021, -0.0493, 0.0256, 0.0337, 0.0319, -0.011, 0.0182, -0.0001, -0.0631, -0.0465, -0.0444, 0.0104, -0.0325, -0.0005, 0.0061, -0.0622, 0.0181, 0.026, -0.0882, -0.0125, -0.024, 0.0433, -0.0047, -0.0338, 0.0179, 0.0823, -0.0053, 0.0045, 0.0143, 0.0477, -0.0193, -0.0393, 0.003, 0.0702, -0.0142, -0.0229, -0.0466, 0.0009, 0.0266, 0.0494, 0.031, 0.0179, 0.0569, -0.0341, 0.0315, 0.0183, 0.0212, 0.0322, -0.0159, -0.0689, -0.0212, -0.0417, 0.0119, 0.0315, 0.0281, -0.0493, ...]","[-0.0631, -0.0205, 0.0247, -0.0117, -0.0551, -0.0111, 0.0098, 0.0222, -0.0371, 0.0421, 0.004, 0.0281, 0.0214, 0.0335, -0.0436, 0.0056, 0.0424, 0.0281, -0.0383, 0.04, -0.0044, -0.0179, -0.0166, 0.0184, 0.0233, -0.0586, 0.0041, 0.0351, -0.0326, -0.0145, 0.0557, 0.0028, 0.0432, 0.0172, 0.0004, -0.0054, -0.0102, 0.0429, -0.0103, 0.0772, 0.0205, -0.0479, 0.0601, 0.05, 0.0368, -0.0339, 0.0018, -0.0068, 0.0092, -0.0029, -0.0081, -0.0413, -0.0331, -0.0399, -0.0391, -0.0148, -0.0071, 0.0052, 0.0406, -0.1026, 0.0277, -0.0022, -0.0186, -0.0211, 0.0446, 0.0425, -0.0287, 0.0265, 0.0064, 0.0411, -0.0324, -0.0817, 0.0525, 0.0509, 0.0034, -0.0137, 0.0003, -0.0242, -0.0034, 0.0036, -0.0272, -0.0006, 0.0195, 0.039, 0.0059, 0.0566, 0.0771, 0.0027, 0.0984, 0.0006, 0.063, -0.0523, -0.0461, 0.0105, -0.0146, -0.0788, 0.0397, 0.0113, 0.0015, 0.0115, ...]"
4,RepBonnie,2.968452e+09,2019-02-13T12:17:10-05:00,https://www.twitter.com/SenSherrodBrown/statuses/1095732026571931649,"the cost of everything from healthcare, to rent, to college tuition is up but for most workers, wages are flat. they need more money in their pockets to keep up. that s why , , and i are introducing a cost of living refund.",D,"[-0.0648, 0.0289, 0.0047, -0.0091, -0.0237, 0.0085, -0.0252, 0.0726, -0.0088, -0.0314, 0.0523, 0.0902, 0.0262, 0.0091, 0.0236, 0.0056, -0.0245, 0.0366, 0.039, -0.0124, -0.0079, 0.0111, -0.0043, 0.0078, -0.0486, 0.0277, 0.0438, 0.0089, -0.0016, 0.0269, 0.1063, -0.0336, -0.0041, 0.0235, 0.0002, -0.0338, 0.0008, -0.0062, -0.075, -0.0258, 0.0098, -0.019, -0.0097, 0.0221, 0.0084, 0.0382, -0.0188, 0.0392, 0.024, -0.0438, 0.0016, -0.0959, 0.0191, 0.0154, -0.0751, 0.0039, 0.0303, -0.0063, -0.0567, -0.0828, 0.0089, -0.0076, -0.037, -0.058, -0.0089, 0.0392, -0.0217, -0.0306, -0.0148, 0.0388, -0.0245, -0.036, -0.0237, -0.0102, 0.0134, -0.0472, -0.0478, 0.0133, -0.0022, 0.0169, 0.0033, -0.0096, 0.0014, 0.001, 0.033, -0.0283, -0.062, -0.0075, -0.0076, 0.0279, 0.0518, 0.0366, -0.0047, 0.0239, -0.0302, 0.0052, 0.0281, -0.024, 0.0106, 0.0016, ...]","[-0.0366, 0.0972, -0.0118, 0.0174, -0.0183, -0.0142, -0.0204, 0.0414, -0.0126, 0.008, 0.0349, 0.069, 0.0107, 0.0337, 0.034, -0.0403, 0.0557, 0.0599, 0.0114, 0.0187, -0.0239, 0.0296, 0.0269, 0.0063, -0.0391, 0.0209, 0.0334, -0.0029, 0.0528, -0.0222, 0.0615, -0.0529, 0.0031, 0.0047, 0.004, -0.0614, -0.0017, 0.0167, -0.0405, -0.0171, 0.0081, -0.0211, -0.0229, 0.0379, 0.0307, 0.0065, -0.0079, 0.0841, -0.0012, -0.0122, 0.0175, -0.0349, 0.0231, -0.0269, -0.0482, 0.0424, 0.0236, -0.0347, -0.0133, -0.0479, 0.0302, -0.0125, -0.031, -0.0361, -0.0213, 0.0591, -0.0209, 0.0014, 0.0282, 0.0592, 0.0015, -0.0299, 0.0154, 0.027, 0.043, -0.0433, -0.0459, 0.0396, -0.0083, 0.0381, 0.026, 0.0034, 0.0304, 0.0355, 0.0779, 0.0344, -0.0064, -0.012, -0.0025, 0.0423, 0.0251, -0.0208, -0.0137, -0.0484, -0.017, -0.0028, -0.0066, -0.0023, 0.0343, -0.0114, ...]","[-0.052, 0.0812, -0.0072, 0.0228, -0.0269, 0.0094, -0.0583, 0.0634, -0.0034, -0.0028, 0.0541, 0.0574, 0.0315, 0.0465, 0.0047, -0.0417, 0.0497, 0.0774, -0.0091, -0.0118, -0.0265, 0.0338, 0.0005, 0.0052, -0.0663, 0.0149, 0.0439, -0.0142, 0.0466, -0.0057, 0.064, -0.0508, -0.0032, 0.0391, -0.0032, -0.0518, -0.0106, 0.011, -0.0362, -0.0464, 0.0117, -0.0639, -0.0151, 0.0449, 0.0555, -0.0058, 0.0152, 0.0561, 0.021, -0.0095, 0.0167, -0.0542, -0.0098, -0.0304, -0.0613, 0.0317, 0.006, -0.0157, 0.0038, -0.062, 0.0297, -0.0497, -0.0402, -0.0038, -0.0317, 0.0609, -0.0155, 0.0287, 0.0357, 0.0646, 0.0153, -0.0443, 0.0266, 0.042, 0.0092, -0.0281, -0.0385, 0.0538, 0.0042, 0.0453, 0.0005, -0.0128, 0.0212, 0.0339, 0.0821, 0.0303, 0.0031, 0.0037, -0.0064, 0.0366, 0.0144, -0.0322, -0.0241, -0.0438, -0.018, -0.0037, -0.0037, -0.0, 0.0665, -0.0102, ...]","[-0.07, -0.0068, 0.01, -0.0308, -0.0366, -0.0189, -0.0441, 0.067, -0.0256, 0.0137, 0.0234, 0.0471, 0.0412, 0.0652, 0.031, -0.0243, 0.0499, 0.0828, -0.0347, 0.0317, 0.0109, -0.0038, 0.0051, 0.0022, -0.04, -0.0141, 0.0396, 0.0374, -0.0094, -0.0345, 0.0373, 0.0014, 0.0106, -0.0062, 0.0003, -0.0303, -0.0103, 0.0303, -0.0371, 0.0221, 0.0221, -0.0974, 0.0738, 0.0438, 0.039, -0.0075, 0.0111, 0.0196, 0.0058, -0.0093, -0.0023, -0.1045, 0.0375, -0.0191, -0.0233, 0.0066, 0.0106, 0.0124, 0.0485, -0.0586, 0.0177, -0.0353, -0.009, -0.0009, 0.041, 0.0461, -0.0275, 0.0451, 0.0257, 0.0444, -0.0143, -0.0809, 0.0327, 0.0509, -0.0112, -0.0057, -0.0436, 0.0174, 0.036, -0.0098, 0.0029, 0.0226, 0.0103, 0.0245, 0.0279, 0.0459, 0.0575, 0.0141, 0.0597, 0.029, 0.0287, -0.1027, -0.0452, 0.0374, -0.0055, -0.0467, 0.0482, 0.0085, 0.0375, 0.0166, ...]"
...,...,...,...,...,...,...,...,...,...,...
9995,GReschenthaler,4.205134e+09,2020-01-04T11:39:12-05:00,https://www.twitter.com/GReschenthaler/statuses/1213500135382097920,this morning i joined to discuss us iran tensions. i was stationed in iraq as a navy jag. i can tell you: the us military will not get caught flat footed by iran.,R,"[0.0022, 0.0361, 0.0079, -0.0418, 0.0531, -0.0375, -0.0248, 0.004, 0.0153, -0.0113, 0.01, 0.004, -0.0461, 0.0022, 0.0307, 0.0538, 0.0391, -0.0278, 0.0196, 0.0089, 0.0213, -0.0037, -0.0721, -0.0586, -0.0523, 0.0381, -0.0482, -0.0054, 0.0363, -0.0576, 0.0607, -0.0017, -0.0232, 0.0165, -0.0001, -0.0248, 0.028, -0.0058, 0.0095, -0.0537, -0.0083, -0.0606, -0.0411, 0.0091, -0.02, -0.0206, -0.0057, 0.0376, 0.029, 0.0368, 0.0005, 0.0401, -0.0618, 0.0215, -0.0866, -0.05, 0.0308, -0.061, -0.079, -0.0158, 0.0612, -0.0175, -0.0583, -0.0021, -0.0026, 0.0631, -0.0247, 0.0766, 0.0156, -0.0151, 0.0554, -0.0253, -0.0217, -0.0407, 0.0325, 0.0722, -0.0175, -0.0299, -0.004, -0.0251, 0.0105, -0.0118, -0.0184, -0.0041, 0.0019, 0.0027, 0.0044, -0.0466, 0.0073, -0.0181, -0.0611, -0.0131, -0.0537, 0.0332, -0.0055, -0.0081, -0.0201, -0.0182, 0.0616, -0.0352, ...]","[-0.0159, 0.0889, 0.0012, -0.0004, -0.0004, -0.0039, 0.0205, -0.0153, 0.0179, 0.0334, 0.0427, 0.0438, -0.0445, 0.025, 0.0548, -0.0046, 0.0728, 0.0297, -0.046, 0.0521, -0.0274, 0.0331, -0.0185, -0.0045, -0.0374, -0.0272, 0.0113, -0.0006, 0.0544, -0.0702, 0.0224, -0.0636, 0.0049, 0.0157, 0.0033, -0.057, -0.0309, 0.0213, -0.0598, -0.0458, -0.0177, -0.0517, -0.029, -0.0035, -0.0122, -0.0225, 0.0093, 0.0709, -0.0108, 0.0229, 0.0112, -0.0074, 0.0142, -0.0235, -0.0832, 0.0441, -0.0122, -0.0287, -0.0226, 0.0063, 0.0689, 0.0139, -0.0212, -0.0398, 0.0027, 0.0492, 0.0052, 0.0542, 0.0145, 0.0594, 0.0278, -0.0137, 0.0069, 0.0136, 0.0451, -0.0377, -0.0322, 0.0092, 0.0124, -0.001, 0.0357, -0.0003, 0.0494, 0.0442, 0.0631, 0.0617, 0.0031, -0.0134, 0.0057, 0.029, -0.035, -0.023, -0.0223, -0.0146, -0.0247, -0.0037, 0.0406, 0.0076, 0.029, -0.0522, ...]","[-0.0391, 0.0631, 0.0106, -0.0031, 0.0119, -0.0027, -0.0188, 0.01, 0.0003, 0.0363, 0.0517, 0.0359, -0.0382, 0.0345, 0.0346, 0.0127, 0.0852, 0.0552, -0.053, 0.0353, -0.0138, 0.0255, -0.0474, -0.0206, -0.046, -0.0105, 0.0142, -0.0075, 0.0209, -0.0703, 0.0226, -0.0685, -0.0116, 0.0334, -0.0028, -0.0463, -0.0526, 0.0109, -0.0437, -0.0785, -0.0069, -0.0885, -0.0033, 0.0112, 0.0109, -0.0087, 0.0327, 0.0574, 0.0065, 0.0004, 0.0133, -0.0358, -0.0095, -0.0292, -0.0973, 0.0306, -0.0165, -0.0253, -0.025, 0.0057, 0.0792, -0.0144, -0.0353, 0.0059, 0.0075, 0.0449, 0.004, 0.0626, 0.0046, 0.0588, 0.0321, -0.0349, -0.0023, 0.0159, 0.0109, -0.018, -0.018, 0.0101, 0.0369, -0.0092, 0.0186, -0.0101, 0.0478, 0.0093, 0.0455, 0.0759, 0.0206, -0.0004, 0.0309, 0.0169, -0.0244, -0.0112, -0.0269, 0.0038, -0.0367, -0.0113, 0.0462, 0.0003, 0.044, -0.0462, ...]","[-0.0466, 0.0048, 0.0105, -0.0299, 0.0027, -0.0193, -0.022, 0.0272, -0.0127, 0.0393, 0.0312, 0.0217, -0.028, 0.0448, 0.0542, -0.0021, 0.0496, 0.0565, -0.0463, 0.0493, 0.0155, -0.0207, -0.0244, -0.0155, -0.0149, 0.0041, 0.0289, 0.0306, -0.0221, -0.0778, 0.0055, -0.0226, 0.0038, 0.0272, -0.0001, -0.056, -0.0281, 0.023, -0.0348, -0.0504, -0.0012, -0.0825, 0.0312, -0.0025, 0.0284, 0.0221, 0.009, 0.0135, -0.0237, -0.0326, 0.0049, -0.0875, 0.0388, -0.0069, -0.0698, 0.0152, 0.0082, -0.0118, 0.0254, -0.0042, 0.0583, -0.0082, 0.0081, -0.0216, 0.048, 0.0167, 0.0119, 0.0564, -0.0245, 0.0253, 0.0279, -0.0646, 0.022, 0.0358, -0.0047, -0.0169, -0.0398, -0.0087, 0.0713, -0.0363, 0.0066, -0.0044, 0.0277, -0.0093, 0.0359, 0.0885, 0.0449, 0.0018, 0.0657, 0.0159, 0.0165, -0.0794, 0.0036, 0.0609, -0.0084, -0.0194, 0.08, -0.0009, 0.0025, -0.0067, ...]"
9996,lisamurkowski,1.806167e+07,2020-12-03T15:20:18-05:00,https://www.twitter.com/lisamurkowski/statuses/1334593323793866752,know that i am monitoring the situation closely and will continue engage with state and local officials. thank you to all the individuals working around the clock to support emergency response efforts &; the alaskans who are pitching in a helping hand for their neighbors in need.,R,"[-0.0042, 0.0541, -0.0237, 0.0155, 0.0138, -0.0178, -0.0062, -0.0192, -0.0309, -0.0448, 0.0292, 0.013, -0.0246, -0.0039, 0.0303, -0.0361, 0.0233, -0.0214, 0.036, -0.0022, 0.0024, 0.021, -0.0113, -0.0095, -0.0208, 0.028, -0.0158, -0.0071, 0.0338, -0.0603, 0.012, -0.0282, -0.0307, -0.0167, 0.0003, -0.0229, 0.0266, 0.0102, -0.0774, -0.0152, -0.04, -0.137, -0.0438, -0.0103, -0.0041, -0.0416, 0.0061, 0.0215, 0.0142, 0.0388, 0.0095, 0.0329, 0.0971, 0.0227, -0.0446, 0.0514, 0.008, -0.0744, -0.0607, -0.044, 0.0646, 0.066, -0.0147, 0.0109, 0.0078, 0.0551, 0.0535, 0.0099, -0.0361, -0.0031, 0.0611, 0.0157, 0.003, 0.0126, -0.0348, -0.0073, 0.0118, -0.0113, -0.0173, -0.0251, -0.0524, 0.0251, -0.0046, 0.004, -0.0353, -0.0093, -0.0113, -0.0163, -0.0646, 0.0302, -0.0144, -0.0147, 0.0112, -0.0175, 0.0119, -0.009, 0.0293, -0.0348, 0.0056, -0.0447, ...]","[-0.036, 0.0988, -0.0295, -0.0211, 0.0032, -0.0082, -0.009, 0.0064, 0.0003, 0.014, 0.0466, 0.0434, -0.0332, 0.0224, 0.0474, -0.0536, 0.0697, 0.0318, -0.0131, 0.0172, -0.0118, 0.0282, 0.0159, 0.0057, -0.028, -0.0352, 0.0118, 0.0114, 0.0325, -0.0771, 0.0072, -0.0578, -0.0235, 0.0094, 0.0038, -0.0609, -0.013, 0.0204, -0.0744, -0.017, -0.0104, -0.0649, -0.0245, 0.0087, 0.0177, -0.0183, 0.0252, 0.0491, -0.0297, 0.0098, 0.0189, 0.0075, 0.0255, -0.0176, -0.0449, 0.076, 0.005, -0.0525, -0.0077, -0.0245, 0.0556, 0.0341, -0.028, -0.0255, 0.0123, 0.0386, 0.0261, 0.0234, 0.005, 0.014, 0.0488, -0.022, 0.0098, 0.0343, 0.0395, -0.0308, -0.0435, 0.0072, 0.0081, 0.0232, 0.0153, 0.0202, 0.0354, 0.0553, 0.0567, 0.0447, 0.0049, -0.0081, -0.004, 0.038, -0.0054, -0.0546, 0.0005, -0.0304, 0.0327, -0.0207, 0.0447, 0.0134, 0.0226, -0.0335, ...]","[-0.0625, 0.0596, -0.0338, -0.0473, 0.0117, 0.0012, -0.0466, 0.029, -0.0041, 0.0077, 0.0476, 0.023, -0.0222, 0.0315, 0.0477, -0.0451, 0.0797, 0.0378, -0.0306, 0.0003, -0.0054, 0.0316, -0.015, -0.0026, -0.0401, -0.0351, 0.0107, 0.0136, 0.0155, -0.0665, 0.0025, -0.0521, 0.0007, 0.0177, -0.0027, -0.0562, -0.0343, 0.0062, -0.0731, -0.0319, -0.0137, -0.0857, -0.0137, 0.0012, 0.0465, -0.0192, 0.038, 0.0248, -0.0378, -0.001, 0.0216, -0.0137, 0.0131, -0.0153, -0.0424, 0.0687, 0.0002, -0.0326, 0.0091, -0.0316, 0.0635, -0.0043, -0.0217, -0.0111, 0.0326, 0.0355, 0.034, 0.0636, -0.0069, 0.0209, 0.0613, -0.0452, 0.016, 0.0372, 0.0125, -0.0163, -0.038, -0.0051, 0.0406, 0.0239, 0.0121, -0.0035, 0.0329, 0.0348, 0.0441, 0.0466, 0.0087, 0.0021, -0.0012, 0.0411, -0.0027, -0.0605, 0.0109, -0.0097, 0.0264, -0.0381, 0.0412, 0.015, 0.0217, 0.0015, ...]","[-0.0568, 0.008, -0.0193, -0.0724, 0.0043, -0.0134, -0.0499, 0.0273, -0.0175, 0.0285, 0.023, 0.0289, -0.0056, 0.0385, 0.0731, -0.0461, 0.0554, 0.0313, -0.027, 0.035, 0.0052, 0.0107, -0.0171, 0.0097, -0.0057, -0.0258, 0.0335, 0.0534, -0.0129, -0.0652, -0.0045, -0.0054, 0.0234, -0.0074, 0.0001, -0.0407, -0.0076, 0.0177, -0.0752, 0.0169, -0.0208, -0.0608, 0.0298, -0.003, 0.058, 0.0058, 0.0263, -0.0112, -0.0357, -0.0194, 0.0062, -0.0721, 0.0271, -0.0015, -0.0303, 0.0499, 0.0054, -0.0125, 0.0354, -0.0468, 0.0451, 0.0073, 0.0032, -0.0273, 0.0704, 0.0064, 0.0096, 0.0753, -0.0198, 0.0074, 0.0356, -0.0729, 0.0349, 0.0356, 0.0037, -0.0183, -0.061, -0.021, 0.0496, 0.0079, 0.0017, -0.0007, 0.0297, 0.0159, 0.0244, 0.0628, 0.0286, 0.0056, 0.0323, 0.0303, 0.0267, -0.0973, 0.0131, 0.0381, 0.0432, -0.0442, 0.059, -0.0042, -0.0126, 0.0232, ...]"
9997,RepAndyBiggsAZ,8.166526e+17,2022-05-04T09:07:44-04:00,https://www.twitter.com/RepAndyBiggsAZ/statuses/1521838992676823041,"biden's ""ministry of truth"" is nonsense and an orwellian measure. that's why , 50 members, and i are introducing a bill to defund this unconstitutional board. more to come.",R,"[0.0182, 0.1305, 0.0047, 0.0274, -0.0536, 0.0215, 0.0339, 0.0058, -0.0492, -0.03, 0.0071, 0.0007, -0.0023, 0.0217, 0.0249, 0.0258, 0.0493, 0.0602, -0.0194, -0.0351, 0.0129, 0.0108, -0.0486, -0.0011, -0.0033, -0.0145, 0.0261, -0.0114, 0.0005, 0.0224, -0.0545, 0.0099, -0.0664, 0.0291, 0.0003, 0.0224, 0.0003, 0.0187, -0.0821, 0.0014, 0.0079, 0.0317, -0.0047, -0.0232, -0.0095, -0.0105, 0.005, 0.0036, 0.0192, 0.0208, 0.0137, 0.0825, -0.0288, 0.0163, -0.0523, 0.0292, 0.0335, -0.0409, -0.0277, 0.0182, 0.0199, 0.016, -0.0049, -0.021, -0.0772, 0.0288, 0.0584, -0.007, -0.0134, 0.0184, 0.0843, -0.001, 0.067, 0.0513, 0.0209, 0.0272, 0.0268, 0.0813, -0.0266, -0.0362, -0.0314, -0.0002, 0.0178, 0.0015, 0.058, 0.0313, -0.003, 0.0049, -0.0345, -0.0293, 0.0271, -0.0357, 0.0098, -0.0739, 0.0027, -0.018, 0.0184, -0.0334, 0.0429, 0.0121, ...]","[-0.0093, 0.1226, 0.0157, 0.0446, -0.0318, -0.0012, 0.0432, 0.0139, 0.0059, 0.0128, 0.0057, 0.0689, -0.0094, 0.0185, -0.0067, -0.0109, 0.058, 0.0281, -0.0302, -0.0106, -0.0475, 0.0259, 0.0694, 0.045, -0.0029, -0.0313, 0.0314, -0.0259, 0.0277, -0.0004, 0.0419, -0.0468, -0.0558, 0.0482, 0.0037, -0.0401, 0.026, 0.0445, -0.0376, 0.0364, 0.0222, 0.0222, -0.0148, 0.0365, 0.0115, -0.0526, 0.0051, 0.0818, 0.0095, 0.011, 0.0151, 0.0295, -0.0029, -0.0471, -0.0312, 0.0317, -0.0006, -0.0274, -0.004, -0.0623, 0.0212, 0.0222, -0.0554, -0.0084, -0.0622, 0.0663, -0.0001, -0.0247, 0.0399, 0.0625, -0.0174, 0.0031, 0.0254, 0.037, 0.018, -0.0381, -0.0019, 0.0823, -0.0431, -0.0152, -0.028, 0.007, 0.0246, 0.0397, 0.0699, 0.0269, 0.0441, -0.016, 0.0, 0.0226, 0.0268, 0.0007, -0.0037, -0.1003, -0.0081, -0.0182, 0.01, 0.0183, 0.0313, -0.0346, ...]","[-0.0232, 0.099, 0.0219, 0.0427, -0.0209, 0.0057, 0.0331, 0.0105, 0.0108, 0.0128, 0.0089, 0.0473, -0.0144, 0.0292, -0.027, 0.025, 0.0439, 0.0434, -0.0333, -0.0339, -0.0323, 0.0219, 0.0569, 0.0479, -0.0263, -0.03, 0.0394, -0.0401, 0.0224, -0.0031, 0.0335, -0.0507, -0.0646, 0.0561, -0.0025, -0.0199, 0.0193, 0.0644, -0.021, 0.034, 0.0227, 0.0147, -0.0009, 0.0543, 0.0203, -0.0753, 0.0258, 0.0561, 0.0436, 0.0043, 0.0043, 0.0185, -0.0423, -0.0508, -0.0263, 0.0315, -0.0085, -0.0055, 0.0155, -0.0803, 0.0179, -0.0062, -0.0628, 0.0085, -0.0787, 0.0834, -0.0024, -0.0081, 0.0394, 0.0644, -0.0202, -0.0065, 0.0251, 0.0455, -0.0085, -0.0307, -0.0002, 0.0918, -0.0436, -0.009, -0.054, -0.0067, 0.0306, 0.0358, 0.0431, 0.0198, 0.046, -0.0114, 0.0184, 0.0181, 0.0171, 0.0176, -0.0094, -0.0934, -0.0123, -0.0308, 0.0236, 0.0231, 0.0404, -0.023, ...]","[-0.0432, -0.0074, 0.0241, 0.0192, -0.039, -0.0022, 0.0138, 0.0229, -0.04, 0.0266, -0.0283, 0.0409, 0.0173, 0.0538, -0.0089, 0.0149, 0.0297, 0.0212, -0.0381, 0.0129, 0.0082, -0.0042, 0.0162, 0.0482, 0.007, -0.0505, 0.0241, 0.0306, -0.0055, -0.0208, 0.0373, -0.0016, 0.0028, 0.0172, 0.0004, -0.0019, 0.0193, 0.055, -0.0246, 0.1011, 0.0335, -0.0126, 0.0523, 0.0699, 0.0337, -0.0615, 0.0032, 0.0105, 0.0173, 0.0136, -0.0071, -0.0244, -0.0302, -0.0478, -0.014, -0.0013, 0.0109, 0.014, 0.0384, -0.1004, 0.0051, -0.0114, -0.0378, 0.0044, 0.015, 0.0636, -0.027, 0.0256, 0.033, 0.0443, -0.0548, -0.0936, 0.0341, 0.051, 0.007, -0.0106, -0.0079, 0.0099, -0.0169, 0.0112, -0.0314, -0.0004, 0.0273, 0.029, 0.007, 0.0395, 0.0709, -0.0025, 0.0908, -0.0086, 0.0536, -0.0457, -0.0397, -0.023, -0.0219, -0.0652, 0.0482, 0.0052, 0.0156, 0.0215, ...]"
9998,WarrenDavidson,7.427355e+17,2021-12-23T08:27:51-05:00,https://www.twitter.com/WarrenDavidson/statuses/1474008856342810637,trust the science. risks are not evenly distributed. talk with your own doctor. facts &gt; fear,R,"[-0.0212, -0.0301, -0.0136, -0.0417, 0.085, 0.0168, -0.0029, -0.0541, -0.0023, 0.0477, 0.025, -0.0221, -0.0377, 0.0348, -0.0684, 0.0156, 0.0201, -0.0004, 0.0193, -0.0264, 0.0018, -0.0089, 0.0042, 0.0279, 0.0268, 0.0062, 0.0597, 0.051, 0.0146, -0.07, -0.0022, -0.0306, 0.0264, 0.0173, 0.0003, -0.0252, 0.0176, -0.0224, 0.0169, -0.0567, 0.0004, -0.0113, 0.0244, -0.0121, 0.002, -0.0395, 0.0139, 0.0193, 0.0137, 0.0013, -0.0058, 0.0344, 0.031, 0.0486, 0.0538, 0.0046, -0.0379, -0.017, -0.0481, 0.084, 0.0085, 0.0289, -0.0105, 0.0108, -0.0183, -0.0232, 0.0175, -0.1294, -0.0069, 0.0205, 0.0107, 0.0218, 0.0021, -0.0134, -0.0493, 0.0012, -0.0041, -0.0079, 0.0253, 0.0269, 0.0189, 0.0254, 0.0156, -0.0249, 0.0219, -0.022, 0.0375, 0.0092, 0.0318, -0.0153, 0.0771, -0.0068, 0.0086, -0.0295, -0.0224, 0.0212, 0.0074, -0.0098, 0.0607, -0.0579, ...]","[-0.0214, 0.1224, -0.0144, 0.003, 0.0082, -0.0003, 0.0264, 0.0169, 0.0346, 0.0588, 0.0303, 0.0325, -0.0245, 0.035, -0.0372, -0.012, 0.0477, 0.029, 0.0315, -0.0114, -0.0487, -0.0123, 0.0219, 0.0161, 0.0388, -0.0021, 0.0568, -0.0162, 0.0329, -0.0406, 0.0224, -0.0373, -0.0498, 0.0492, 0.0039, -0.0376, 0.0082, -0.0028, -0.0053, -0.0323, 0.0149, 0.0016, -0.0529, 0.036, 0.0142, -0.0142, -0.0014, 0.0607, -0.0492, -0.0073, 0.0007, 0.0437, 0.0144, -0.0075, -0.0276, 0.0253, 0.0101, -0.0287, -0.0307, 0.0295, 0.0145, 0.003, -0.0641, -0.0376, 0.0135, 0.0807, 0.0673, -0.0825, 0.0051, 0.005, 0.0081, 0.0073, 0.0095, 0.0173, 0.0144, -0.0111, -0.0398, -0.0172, -0.0002, 0.0178, 0.0137, 0.0195, 0.011, 0.0297, 0.0532, 0.0317, 0.0031, -0.0151, -0.0062, 0.0335, 0.0317, -0.0369, -0.0185, -0.0474, 0.0284, 0.0106, 0.0123, 0.0508, 0.0315, -0.0459, ...]","[-0.028, 0.1097, -0.0074, -0.0086, 0.0141, -0.0151, -0.0052, 0.0319, 0.0446, 0.0601, 0.0316, 0.0192, -0.0063, 0.0448, -0.0711, 0.0035, 0.0397, 0.0359, 0.0282, -0.0331, -0.0432, -0.0081, -0.0144, 0.0081, 0.0314, 0.0018, 0.0466, -0.0259, 0.0148, -0.0147, 0.0197, -0.0371, -0.056, 0.0588, -0.0027, -0.029, -0.009, -0.0182, 0.0223, -0.0705, 0.0225, -0.0116, -0.042, 0.043, 0.0336, -0.0262, 0.0159, 0.0383, -0.0517, -0.0106, 0.0084, 0.0157, -0.0053, -0.0108, -0.0301, 0.0063, 0.0024, -0.0153, -0.0433, 0.0245, 0.0282, -0.0379, -0.0708, -0.009, 0.0125, 0.0871, 0.0818, -0.0595, -0.0004, 0.0034, 0.0166, -0.0088, 0.0105, 0.0052, -0.0125, 0.0043, -0.0295, -0.016, 0.0138, 0.0358, -0.0044, -0.0054, 0.0156, 0.016, 0.0443, 0.0471, 0.012, -0.0103, 0.0116, 0.0166, 0.0136, -0.0348, -0.0116, -0.0313, 0.0279, -0.0045, 0.0052, 0.0541, 0.0511, -0.0187, ...]","[-0.0322, 0.0333, -0.0125, -0.0286, 0.0199, -0.0344, -0.0268, 0.0288, 0.0042, 0.0954, -0.011, -0.0264, -0.0083, 0.0425, -0.0308, 0.0101, 0.0223, -0.0078, 0.0098, 0.0151, -0.019, -0.0359, -0.0356, 0.0223, 0.0574, 0.0017, 0.0535, 0.0326, -0.0012, -0.0384, -0.0033, -0.0046, -0.0167, 0.043, 0.0001, -0.0193, 0.0373, -0.0243, 0.0235, -0.0507, 0.0041, -0.0108, -0.0242, 0.0022, 0.0432, -0.0077, 0.0076, -0.0043, -0.0616, -0.0275, -0.0096, -0.0326, 0.0004, 0.0313, -0.011, -0.0008, 0.0151, -0.0072, -0.0046, 0.0099, 0.0195, -0.0358, -0.0217, -0.0413, 0.0657, 0.0466, 0.0637, -0.0389, -0.0401, -0.0326, 0.0252, -0.0075, 0.0161, 0.0019, -0.0092, 0.0038, -0.0691, -0.0823, 0.0427, 0.0172, -0.0034, -0.0037, 0.0094, -0.016, 0.0363, 0.0823, 0.0111, -0.0043, 0.0468, 0.0277, 0.0432, -0.0934, -0.0113, 0.034, 0.0371, -0.0072, 0.0228, 0.0418, -0.0022, -0.0066, ...]"


time: 70.8 ms (started: 2024-02-15 15:25:11 +00:00)


In [29]:
df_finetuned[df_finetuned[f'cosim_{finetuned_model_x}']>0.90]['party'].value_counts()

Series([], Name: party, dtype: int64)

time: 6.93 ms (started: 2024-02-20 09:32:03 +00:00)


In [31]:
from sklearn.utils import resample

economic_claims = pd.read_excel('/csg_nas/Vahid/Datasets/StanceAwareSBERT/Datasets/economic_claims_Kialo_bertsearched.xlsx')
economic_claims['party_truth'] = economic_claims['Economic_label_AI'].map({'Economically Left':'D', 'Economically Right':'R'})
# Separate the DataFrame into two classes
df_D = economic_claims[economic_claims['party_truth'] == 'D']
df_R = economic_claims[economic_claims['party_truth'] == 'R']

# Downsample the larger class ('D') to match the size of the smaller class ('R')
df_D_downsampled = resample(df_D, replace=False, n_samples=len(df_R), random_state=1)

# Combine the two DataFrames back into one
economic_claims = pd.concat([df_D_downsampled, df_R],ignore_index=True)

economic_claims['party_breakdown_original'] = economic_claims['party_breakdown_original'].map(eval)
economic_claims['party_breakdown_finetuned'] = economic_claims['party_breakdown_finetuned'].map(eval)
economic_claims['party_breakdown_pipeline'] = economic_claims['party_breakdown_pipeline'].map(eval)

# pd.read_excel('/csg_nas/Vahid/Datasets/StanceAwareSBERT/Datasets/sociopolitical_claims_Kialo.xlsx')

time: 111 ms (started: 2024-02-20 09:33:13 +00:00)


In [85]:
# economic_claims = economic_claims[economic_claims['Economic_label_AI']!='Unclear'][['claim_translation','Economic_label_AI']]
# economic_claims = pd.concat([economic_claims,pd.DataFrame({'claim_translation':'If economic globalisation is inevitable, it should primarily serve humanity rather than the interests of trans-national corporations.','Economic_label_AI':'Economically Left'},index=[0])],ignore_index=True)
# economic_claims = pd.concat([economic_claims,pd.DataFrame({'claim_translation':'People are ultimately divided more by class than by nationality.','Economic_label_AI':'Economically Left'},index=[0])],ignore_index=True)
# economic_claims = pd.concat([economic_claims,pd.DataFrame({'claim_translation':'Controlling inflation is more important than controlling unemployment.','Economic_label_AI':'Economically Right'},index=[0])],ignore_index=True)
# economic_claims = pd.concat([economic_claims,pd.DataFrame({'claim_translation':'Because corporations cannot be trusted to voluntarily protect the environment, they require regulation.','Economic_label_AI':'Economically Left'},index=[0])],ignore_index=True)
# economic_claims = pd.concat([economic_claims,pd.DataFrame({'claim_translation':'“from each according to his ability, to each according to his need” is a fundamentally good idea.','Economic_label_AI':'Economically Left'},index=[0])],ignore_index=True)
# economic_claims = pd.concat([economic_claims,pd.DataFrame({'claim_translation':'It is regrettable that many personal fortunes are made by people who simply manipulate money and contribute nothing to their society.','Economic_label_AI':'Economically Left'},index=[0])],ignore_index=True)
# economic_claims = pd.concat([economic_claims,pd.DataFrame({'claim_translation':'Protectionism is sometimes necessary in trade.','Economic_label_AI':'Economically Left'},index=[0])],ignore_index=True)
# economic_claims = pd.concat([economic_claims,pd.DataFrame({'claim_translation':'The only social responsibility of a company should be to deliver a profit to its shareholders.','Economic_label_AI':'Economically Right'},index=[0])],ignore_index=True)
# economic_claims = pd.concat([economic_claims,pd.DataFrame({'claim_translation':'The rich are too highly taxed.','Economic_label_AI':'Economically Right'},index=[0])],ignore_index=True)
# economic_claims = pd.concat([economic_claims,pd.DataFrame({'claim_translation':'Those with the ability to pay should have access to higher standards of medical care.','Economic_label_AI':'Economically Right'},index=[0])],ignore_index=True)
# economic_claims = pd.concat([economic_claims,pd.DataFrame({'claim_translation':'Governments should penalise businesses that mislead the public.','Economic_label_AI':'Economically Left'},index=[0])],ignore_index=True)
# economic_claims = pd.concat([economic_claims,pd.DataFrame({'claim_translation':'The freer the market, the freer the people.','Economic_label_AI':'Economically Right'},index=[0])],ignore_index=True)
# economic_claims = pd.concat([economic_claims,pd.DataFrame({'claim_translation':'Taxpayers should not be expected to prop up any theatres or museums that cannot survive on a commercial basis.','Economic_label_AI':'Economically Right'},index=[0])],ignore_index=True)
# economic_claims = pd.concat([economic_claims,pd.DataFrame({'claim_translation':'What’s good for the most successful corporations is always, ultimately, good for all of us.','Economic_label_AI':'Economically Right'},index=[0])],ignore_index=True)
# economic_claims = pd.concat([economic_claims,pd.DataFrame({'claim_translation':'No broadcasting institution, however independent its content, should receive public funding.','Economic_label_AI':'Economically Right'},index=[0])],ignore_index=True)
# economic_claims = pd.concat([economic_claims,pd.DataFrame({'claim_translation':'Charity is better than social security as a means of helping the genuinely disadvantaged.','Economic_label_AI':'Economically Right'},index=[0])],ignore_index=True)

time: 18.5 ms (started: 2023-10-05 15:18:47 +00:00)


In [46]:
# import os
# os.environ["TOKENIZERS_PARALLELISM"] = "false"
# from pandarallel import pandarallel
# pandarallel.initialize(progress_bar=False,nb_workers=50)



from scipy.spatial import distance

def get_similar_sentences(model_x, query_x,df, query_column_x,threshold_x):
    print('\n')
    print(query_x)
    text1 = tokenizer(query_x, return_tensors="pt", max_length=128, truncation=True, padding="max_length")
    embeddings_i = model_x(**text1).tolist()
    embeddings_i = [[round(value, 4) for value in row] for row in embeddings_i]
    
    df[f'cosim_{query_column_x}'] = df[query_column_x].progress_apply(lambda x: 1-distance.cosine(x,embeddings_i[0]))
    df_ = df[df[f'cosim_{query_column_x}']>threshold_x]
    print(df_['party'].value_counts())
    return df_['party'].value_counts().to_dict()

def get_similar_sentences_pipeline(model_1,model_2, query_x,df,query_column_1,query_column_2,threshold_1,threshold_2):
    print('\n')
    print(query_x)
    text1 = tokenizer(query_x, return_tensors="pt", max_length=128, truncation=True, padding="max_length")
    embeddings_i = model_1(**text1).tolist()
    embeddings_i = [[round(value, 4) for value in row] for row in embeddings_i]
    
    df[f'cosim_{query_column_1}'] = df[query_column_1].progress_apply(lambda x: 1-distance.cosine(x,embeddings_i[0]))
    df_ = df[df[f'cosim_{query_column_1}']>threshold_1]
    
    embeddings_j = model_2(**text1).tolist()
    embeddings_j = [[round(value, 4) for value in row] for row in embeddings_j]    
    
    df_[f'cosim_{query_column_2}'] = df_[query_column_2].progress_apply(lambda x: 1-distance.cosine(x,embeddings_j[0]))
    df__ = df_[df_[f'cosim_{query_column_2}']>threshold_2]
    

    print(df__['party'].value_counts())
    
    
    return df__['party'].value_counts().to_dict()


# economic_claims['party_breakdown_pipeline']=economic_claims['claim_translation'].progress_apply(lambda x: get_similar_sentences_pipeline(model_1=model_original,model_2=model_finetuned_t, query_x=x,df=final_sample,query_column_1='all-mpnet-base-v2',query_column_2=finetuned_model_x,threshold_1=0.5,threshold_2=0.7))
# economic_claims.to_excel('/csg_nas/Vahid/Datasets/StanceAwareSBERT/Datasets/economic_claims_Kialo_bertsearched.xlsx',index=None)
# economic_claims['party_breakdown_original']=economic_claims['claim_translation'].progress_apply(lambda x: get_similar_sentences(model_x=model_original, query_x=x ,df = final_sample, query_column_x = 'all-mpnet-base-v2',threshold_x=0.50))
# economic_claims.to_excel('/csg_nas/Vahid/Datasets/StanceAwareSBERT/Datasets/economic_claims_Kialo_bertsearched.xlsx',index=None)
economic_claims['party_breakdown_finetuned']=economic_claims['claim_translation'].progress_apply(lambda x: get_similar_sentences(model_x=model_finetuned_t, query_x=x ,df = final_sample, query_column_x = finetuned_model_x,threshold_x=0.80))
# economic_claims.to_excel('/csg_nas/Vahid/Datasets/StanceAwareSBERT/Datasets/economic_claims_Kialo_bertsearched.xlsx',index=None)

  0%|          | 0/50 [00:00<?, ?it/s]



Inheritance should be abolished.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    2
Name: party, dtype: int64


There should be a 100% inheritance tax on all inherited wealth above 1 million Euros.


  0%|          | 0/10000 [00:00<?, ?it/s]

Series([], Name: party, dtype: int64)


Structural adjustment is the wrong policy.


  0%|          | 0/10000 [00:00<?, ?it/s]

D    8
R    7
Name: party, dtype: int64


It is regrettable that many personal fortunes are made by people who simply manipulate money and contribute nothing to their society.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    39
D    28
Name: party, dtype: int64


India should have a Universal Basic Income (UBI).


  0%|          | 0/10000 [00:00<?, ?it/s]

D    66
R    31
Name: party, dtype: int64


Price controls are a benefit to society.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    41
D    39
Name: party, dtype: int64


The Sanders Tax is a good thing.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    132
D    112
Name: party, dtype: int64


If economic globalisation is inevitable, it should primarily serve humanity rather than the interests of trans-national corporations.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    3
D    2
Name: party, dtype: int64


Lending money at interest is wrong.


  0%|          | 0/10000 [00:00<?, ?it/s]

Series([], Name: party, dtype: int64)


Everyone's wealth and income information should be publicly available.


  0%|          | 0/10000 [00:00<?, ?it/s]

D    1
Name: party, dtype: int64


A child's primary carer should receive a wage from the government until the child enters primary school or some other form of care.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    1
Name: party, dtype: int64


Modern Monetary Theory, asserting that governments should expand deficits by money printing, is largely correct


  0%|          | 0/10000 [00:00<?, ?it/s]

R    5
D    2
Name: party, dtype: int64


Feminine hygiene products should be available through a complete subsidy.


  0%|          | 0/10000 [00:00<?, ?it/s]

D    2
Name: party, dtype: int64


Internet infrastructure should be publicly owned.


  0%|          | 0/10000 [00:00<?, ?it/s]

D    11
R     8
Name: party, dtype: int64


Fully Automated Luxury Communism is desirable.


  0%|          | 0/10000 [00:00<?, ?it/s]

Series([], Name: party, dtype: int64)


Inheritance should be minimized to create an equal outset for everyone.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    1
Name: party, dtype: int64


“from each according to his ability, to each according to his need” is a fundamentally good idea.


  0%|          | 0/10000 [00:00<?, ?it/s]

Series([], Name: party, dtype: int64)


A socialist economy would work better than a capitalist economy.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    2
D    1
Name: party, dtype: int64


The Subminimum Wage should be abolished.


  0%|          | 0/10000 [00:00<?, ?it/s]

D    4
R    2
Name: party, dtype: int64


The US Government should commit to a Green New Deal.


  0%|          | 0/10000 [00:00<?, ?it/s]

D    117
R     81
Name: party, dtype: int64


Communism Is Viable in the USA.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    9
D    5
Name: party, dtype: int64


Because corporations cannot be trusted to voluntarily protect the environment, they require regulation.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    4
Name: party, dtype: int64


The workweek should be shortened to 15 hours for the same pay


  0%|          | 0/10000 [00:00<?, ?it/s]

R    3
D    1
Name: party, dtype: int64


The private sector is incapable of delivering complete Health Care for Everyone.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    33
D    19
Name: party, dtype: int64


Governments should penalise businesses that mislead the public.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    4
Name: party, dtype: int64


Capitalism is good.


  0%|          | 0/10000 [00:00<?, ?it/s]

Series([], Name: party, dtype: int64)


Price gouging should be legal.


  0%|          | 0/10000 [00:00<?, ?it/s]

Series([], Name: party, dtype: int64)


Free markets are better for humans than regulated markets.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    1
Name: party, dtype: int64


Rent control policies hurt housing affordability and should be repealed.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    92
D    56
Name: party, dtype: int64


The United States should abolish the capital gains tax.


  0%|          | 0/10000 [00:00<?, ?it/s]

D    2
Name: party, dtype: int64


Private property should exist in outer space.


  0%|          | 0/10000 [00:00<?, ?it/s]

Series([], Name: party, dtype: int64)


K-12 teachers are already paid enough in America.


  0%|          | 0/10000 [00:00<?, ?it/s]

Series([], Name: party, dtype: int64)


Extreme poverty can be eradicated through capitalism.


  0%|          | 0/10000 [00:00<?, ?it/s]

D    165
R    126
Name: party, dtype: int64


Free trade is preferable to tariffs for the United States.


  0%|          | 0/10000 [00:00<?, ?it/s]

D    2
R    1
Name: party, dtype: int64


Protectionism is a bad economic policy.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    29
D    17
Name: party, dtype: int64


Capitalism is the best system for the people.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    1
Name: party, dtype: int64


A flat "Asset Tax" is all the tax we should ever pay.


  0%|          | 0/10000 [00:00<?, ?it/s]

Series([], Name: party, dtype: int64)


The US government should stop the COVID-19 shutdown to soften the impact on the economy.


  0%|          | 0/10000 [00:00<?, ?it/s]

D    109
R     74
Name: party, dtype: int64


There should be no welfare state.


  0%|          | 0/10000 [00:00<?, ?it/s]

D    3
R    2
Name: party, dtype: int64


Reducing company tax rates has a net positive benefit on society.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    180
D    174
Name: party, dtype: int64


Charities should replace social security.


  0%|          | 0/10000 [00:00<?, ?it/s]

Series([], Name: party, dtype: int64)


Controlling inflation is more important than controlling unemployment.


  0%|          | 0/10000 [00:00<?, ?it/s]

Series([], Name: party, dtype: int64)


The only social responsibility of a company should be to deliver a profit to its shareholders.


  0%|          | 0/10000 [00:00<?, ?it/s]

Series([], Name: party, dtype: int64)


The rich are too highly taxed.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    50
D    14
Name: party, dtype: int64


Those with the ability to pay should have access to higher standards of medical care.


  0%|          | 0/10000 [00:00<?, ?it/s]

D    112
R     66
Name: party, dtype: int64


The freer the market, the freer the people.


  0%|          | 0/10000 [00:00<?, ?it/s]

D    1
Name: party, dtype: int64


Taxpayers should not be expected to prop up any theatres or museums that cannot survive on a commercial basis.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    2
D    1
Name: party, dtype: int64


What’s good for the most successful corporations is always, ultimately, good for all of us.


  0%|          | 0/10000 [00:00<?, ?it/s]

D    2
R    1
Name: party, dtype: int64


No broadcasting institution, however independent its content, should receive public funding.


  0%|          | 0/10000 [00:00<?, ?it/s]

R    1
Name: party, dtype: int64


Charity is better than social security as a means of helping the genuinely disadvantaged.


  0%|          | 0/10000 [00:00<?, ?it/s]

D    2
Name: party, dtype: int64
time: 2min 23s (started: 2024-02-20 09:53:01 +00:00)


In [36]:
economic_claims['Economic_label_AI'].value_counts().to_dict()

{'Economically Left': 25, 'Economically Right': 25}

time: 4.5 ms (started: 2024-02-20 09:47:14 +00:00)


In [37]:
from scipy.spatial import distance
import numpy as np

def get_key_with_largest_value(input_dict):
    if not input_dict:
        return np.nan  # Return NaN if the dictionary is empty
    else:
        return max(input_dict, key=input_dict.get)

time: 879 µs (started: 2024-02-20 09:47:30 +00:00)


In [38]:
economic_claims['party_prediction_original'] = economic_claims['party_breakdown_original'].map(get_key_with_largest_value)
economic_claims['party_prediction_finetuned'] = economic_claims['party_breakdown_finetuned'].map(get_key_with_largest_value)
economic_claims['party_prediction_pipeline'] = economic_claims['party_breakdown_pipeline'].map(get_key_with_largest_value)
economic_claims

Unnamed: 0,claim_translation,Economic_label_AI,party_breakdown_original,party_breakdown_finetuned,party_breakdown_pipeline,party_truth,party_prediction_original,party_prediction_finetuned,party_prediction_pipeline
0,Inheritance should be abolished.,Economically Left,"{'D': 1, 'R': 1}","{'R': 9379, 'D': 8190}",{},D,D,R,
1,There should be a 100% inheritance tax on all inherited wealth above 1 million Euros.,Economically Left,{'D': 10},"{'D': 38792, 'R': 37073}",{},D,D,D,
2,Structural adjustment is the wrong policy.,Economically Left,"{'R': 11, 'D': 2}","{'R': 9294, 'D': 7578}",{'R': 1},D,R,R,R
3,It is regrettable that many personal fortunes are made by people who simply manipulate money and contribute nothing to their society.,Economically Left,"{'D': 5, 'R': 1}","{'D': 14164, 'R': 14142}",{},D,D,D,
4,India should have a Universal Basic Income (UBI).,Economically Left,"{'D': 4, 'R': 1}","{'D': 36660, 'R': 35147}",{},D,D,D,
5,Price controls are a benefit to society.,Economically Left,"{'D': 11, 'R': 10}","{'D': 34712, 'R': 33697}",{},D,D,D,
6,The Sanders Tax is a good thing.,Economically Left,"{'R': 268, 'D': 107}","{'D': 35037, 'R': 34334}","{'R': 20, 'D': 5}",D,R,D,R
7,"If economic globalisation is inevitable, it should primarily serve humanity rather than the interests of trans-national corporations.",Economically Left,"{'D': 2, 'R': 2}","{'R': 7250, 'D': 6327}",{},D,D,R,
8,Lending money at interest is wrong.,Economically Left,{'D': 3},"{'R': 9630, 'D': 8096}",{},D,D,R,
9,Everyone's wealth and income information should be publicly available.,Economically Left,"{'R': 5, 'D': 4}","{'D': 38420, 'R': 36619}",{},D,R,D,


time: 88.9 ms (started: 2024-02-20 09:47:33 +00:00)


In [34]:
{'D': 1}.get_keys()

AttributeError: 'dict' object has no attribute 'get_keys'

time: 24.9 ms (started: 2023-10-05 12:21:11 +00:00)


In [35]:
dir({'D': 1})

['__class__',
 '__class_getitem__',
 '__contains__',
 '__delattr__',
 '__delitem__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__ior__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__ne__',
 '__new__',
 '__or__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__ror__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'clear',
 'copy',
 'fromkeys',
 'get',
 'items',
 'keys',
 'pop',
 'popitem',
 'setdefault',
 'update',
 'values']

time: 3.94 ms (started: 2023-10-05 12:21:21 +00:00)


In [12]:
economic_claims

Unnamed: 0,claim_translation,Economic_label_AI,party_breakdown_original,party_breakdown_finetuned,party_breakdown_pipeline,party_truth
0,Governments should provide a universal basic income.,Economically Left,"{'D': 11, 'R': 5}","{'D': 36309, 'R': 34834}","{'D': 8, 'R': 2}",D
1,Wealthy countries should provide citizens with a universal basic income (UBI).,Economically Left,"{'D': 8, 'R': 1}","{'D': 37028, 'R': 35520}","{'D': 4, 'R': 1}",D
2,There should be a 100% inheritance tax on all inherited wealth above 1 million Euros.,Economically Left,{'D': 10},"{'D': 38792, 'R': 37073}",{'D': 4},D
3,Inheritance should be abolished.,Economically Left,"{'D': 1, 'R': 1}","{'R': 9379, 'D': 8190}",{},D
4,A socialist economy would work better than a capitalist economy.,Economically Left,"{'R': 10, 'D': 4}","{'D': 30906, 'R': 29979}","{'D': 1, 'R': 1}",D
...,...,...,...,...,...,...
63,"The freer the market, the freer the people.",Economically Right,"{'R': 23, 'D': 5}","{'D': 36295, 'R': 35162}","{'R': 8, 'D': 4}",R
64,Taxpayers should not be expected to prop up any theatres or museums that cannot survive on a commercial basis.,Economically Right,{'R': 2},"{'R': 11974, 'D': 10820}",{'R': 1},R
65,"What’s good for the most successful corporations is always, ultimately, good for all of us.",Economically Right,"{'R': 13, 'D': 10}","{'D': 35636, 'R': 34231}","{'R': 5, 'D': 4}",R
66,"No broadcasting institution, however independent its content, should receive public funding.",Economically Right,{'D': 1},"{'R': 9794, 'D': 8246}",{'D': 1},R


time: 19.7 ms (started: 2023-10-05 18:32:31 +00:00)


In [42]:
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score

# prediction_col = 'party_prediction_original'
prediction_col = 'party_prediction_finetuned'
# prediction_col = 'party_prediction_pipeline'

# Drop rows with NaN values
economic_claims_ =  economic_claims.dropna(subset=[prediction_col], inplace=False)


# Calculate confusion matrix and F1 score for the 'prediction' column after dropping NaNs
confusion_matrix_prediction = confusion_matrix(economic_claims_['party_truth'], economic_claims_[prediction_col], labels=['D', 'R'])
f1_score_prediction = accuracy_score(economic_claims_['party_truth'], economic_claims_[prediction_col])

print("Confusion Matrix:")
print(confusion_matrix_prediction)
print("Accuracy Score:", f1_score_prediction)

Confusion Matrix:
[[18  7]
 [12 13]]
Accuracy Score: 0.62
time: 6.98 ms (started: 2024-02-20 09:48:40 +00:00)


In [66]:
economic_claims_[prediction_col]

0     D
1     D
2     D
3     D
4     R
     ..
63    R
64    R
65    R
66    D
67    D
Name: party_prediction_original, Length: 68, dtype: object

time: 5.59 ms (started: 2023-10-05 14:56:02 +00:00)


In [43]:
economic_claims['party_breakdown_original'] = economic_claims['party_breakdown_original'].map(eval)
economic_claims['party_breakdown_finetuned'] = economic_claims['party_breakdown_finetuned'].map(eval)
economic_claims

TypeError: eval() arg 1 must be a string, bytes or code object

time: 161 ms (started: 2024-02-20 09:49:40 +00:00)


In [26]:
# Extract the 'truth' column
# prediction_col = 'party_breakdown_original'
# prediction_col = 'party_breakdown_finetuned'
prediction_col = 'party_breakdown_pipeline'

economic_claims_ =  economic_claims.copy() #.dropna(subset=[prediction_col], inplace=False).copy()




R_R_count = sum([pred.get('R', 0) for pred in economic_claims_[economic_claims_['party_truth']=='R'][prediction_col]])
D_D_count = sum([pred.get('D', 0) for pred in economic_claims_[economic_claims_['party_truth']=='D'][prediction_col]])
R_D_count = sum([pred.get('D', 0) for pred in economic_claims_[economic_claims_['party_truth']=='R'][prediction_col]])
D_R_count = sum([pred.get('R', 0) for pred in economic_claims_[economic_claims_['party_truth']=='D'][prediction_col]])

# Construct a confusion matrix
confusion_matrix_result = [[D_D_count, R_D_count],
                            [D_R_count, R_R_count]]

# Output the confusion matrix
confusion_matrix_df = pd.DataFrame(confusion_matrix_result, columns=['Actual Democrat', 'Actual Republican'], index=['Predicted Democrat', 'Predicted Republican'])
print(prediction_col)
print(confusion_matrix_df)
print(f'\nAccuracy: {(D_D_count+R_R_count)/(D_D_count+R_R_count+R_D_count+D_R_count)}')
# This code will give you a confusion matrix based on the total counts of 'R' and 'D' in the dictionary predictions that were labeled correctly and incorrectly.

party_breakdown_pipeline
                      Actual Democrat  Actual Republican
Predicted Democrat                207                221
Predicted Republican               62                212

Accuracy: 0.5968660968660968
time: 10.3 ms (started: 2023-10-05 18:41:45 +00:00)
