In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [25]:
pd.set_option('display.max_columns', None)  # Para mostrar todas as colunas
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)  # Para permitir que as colunas sejam exibidas sem corte


### ENCODE EMOTICONS

In [26]:
df_all = pd.read_csv(r"C:\Users\vieir\Mestrado\TESE\pipeline\resultados\df_all_emoticon_seq.csv")


In [27]:
import ast

# Convert strings to lists of dictionaries, if necessary
if df_all["emotion_recognition"].apply(type).eq(str).any():
    df_all["emotion_recognition"] = df_all["emotion_recognition"].apply(ast.literal_eval)


In [28]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

emotion_vocab = {
    'anger': 1, 
    'disgust': 2, 
    'fear': 3, 
    'joy': 4, 
    'neutral': 5, 
    'sadness': 6, 
    'surprise': 7
}



In [29]:
def convert_to_sequence(emotion_list):
    ordered_emotions = [e["emotion"] for e in emotion_list]
    # Convert emotions to numerical indices
    return [emotion_vocab.get(e, 0) for e in ordered_emotions]  # Use 0 for unknown emotions


In [30]:
# Calculate the maximum sequence length
max_sequence_length = df_all["emotion_recognition"].apply(len).max()

print(f"Maximum sequence length found: {max_sequence_length}")



Maximum sequence length found: 100


In [31]:
df = df_all.copy()

In [32]:
# Convert the "emotion_recognition" column into a numerical sequence
df["emotion_sequence"] = df["emotion_recognition"].apply(lambda x: convert_to_sequence(x))

# Standardize the sequence length
max_sequence_length = 100  # Maximum sequence length
df["padded_sequence"] = list(pad_sequences(df["emotion_sequence"], maxlen=max_sequence_length, padding='post'))

In [33]:
df_filtered = df[df['emotion_sequence'].apply(lambda x: isinstance(x, list) and len(x) >= 3)]

In [34]:
classification_counts = df_filtered['classification'].value_counts()
classification_counts

classification
reliable news    36695
fake news        27012
Name: count, dtype: int64

In [35]:
# Function to extract the list of scores
def extract_scores(emotion_list):
    return [d['score'] for d in emotion_list]

# Create the new column with the list of scores
df_filtered['score_sequence'] = df_filtered['emotion_recognition'].apply(extract_scores)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['score_sequence'] = df_filtered['emotion_recognition'].apply(extract_scores)


In [36]:
# df_filtered.to_csv(r'C:\Users\vieir\Mestrado\TESE\pipeline\resultados\df_all_emoticon_seq_list_filtrado1_com_virgula.csv', index=False)

In [16]:
df_fake = df_filtered[df_filtered['classification'] == 'fake news']
df_true = df_filtered[df_filtered['classification'] == 'reliable news']

In [None]:
# Create a dictionary to store DataFrames filtered by category
df_fake_dict = {}

# Iterate over the unique categories in the 'classification' column
for categoria in df_fake['category'].unique():
    # Filter the DataFrame by category and store it in the dictionary
    print(categoria)
    df_fake_dict[categoria] = df_fake[df_fake['category'] == categoria]

df_true_dict = {}

for categoria in df_true['category'].unique():
    print(categoria)
    df_true_dict[categoria] = df_true[df_true['category'] == categoria]


Pseudoscience
Political News
General News
Conspiracy
PAC SuperPAC
Political Advocacy
General News
Political News
Pro-Science
Political Advocacy
PAC SuperPAC


### Prefixpan


In [39]:
from prefixspan import PrefixSpan
import os
import pickle
from collections import Counter


# Create a folder to save the files (optional)
output_folder = "results"
os.makedirs(output_folder, exist_ok=True)

# Function to process a DataFrame and save the result immediately
def process_and_save(category, df, fake_or_true='fake'):
    result = f"\nCategory: {category}\n"

    # Get the sequences from the 'emotion_sequence' column
    sequences = df['emotion_sequence'].tolist()

    # Define minimum support as 1% of the number of observations in the DataFrame
    min_support = max(1, int(len(df) * 0.01))

    # Apply PrefixSpan with minimum support
    ps = PrefixSpan(sequences)
    patterns = ps.frequent(minsup=min_support)
    # Save the 'patterns' variable to the file 'patterns.pkl'
    with open(f'results/{fake_or_true}_patterns_{category}.pkl', 'wb') as file:
        pickle.dump(patterns, file)

    print(f"Pickle file saved.")

    # Sort patterns by frequency (from most to least frequent)
    patterns.sort(key=lambda x: x[0], reverse=True)

    # Flatten all emotions from all sequences
    all_emotions = [emotion for sequence in sequences for emotion in sequence]

    # Count the frequency of each emotion
    emotion_counts = Counter(all_emotions)

    result += f"Emotion counts:\n"
    for emotion, count in emotion_counts.items():
        result += f"Emotion: {emotion}, Count: {count}\n"

    # Display the 20 most frequent patterns
    result += "Most frequent patterns:\n"
    for freq, pattern in patterns[:20]:
        result += f"Sequence frequency: {freq}, Pattern: {pattern}\n"

    # Filter patterns with length >= 3 and not all identical
    filtered_patterns = [tup for tup in patterns if len(tup[1]) >= 3 and len(set(tup[1])) > 2]
    result += "\nFiltered patterns (length >= 3 and not all identical):\n"
    for freq, pattern in filtered_patterns[:10]:
        result += f"Sequence frequency: {freq}, Pattern: {pattern}\n"

    # Filter patterns with length == 2, not all identical, and without number 5
    filtered_patterns_2 = [tup for tup in patterns if len(tup[1]) == 2 and len(set(tup[1])) > 1 and 5 not in tup[1]]
    result += "\nFiltered patterns (length == 2, not all identical, and without number 5):\n"
    for freq, pattern in filtered_patterns_2:
        result += f"Sequence frequency: {freq}, Pattern: {pattern}\n"

    # Filter patterns with length == 3, not all identical, and without number 5
    filtered_patterns_2 = [tup for tup in patterns if len(tup[1]) == 3 and len(set(tup[1])) > 2 and 5 not in tup[1]]
    result += "\nFiltered patterns (length == 3, not all identical, and without number 5):\n"
    for freq, pattern in filtered_patterns_2:
        result += f"Sequence frequency: {freq}, Pattern: {pattern}\n"

    # Save the result to a file immediately after processing
    file_path = os.path.join(output_folder, f"{fake_or_true}_{category}_results.txt")
    with open(file_path, "w", encoding="utf-8") as file:
        file.write(result)

    print(f"File saved: {file_path}")  # Debug message

# Iterate through DataFrames in the df_fake_dict dictionary and process them sequentially
for category, df in df_fake_dict.items():
    process_and_save(category, df, 'fake')

print("\nAll files have been generated and saved in the 'results' folder.")


Pickle file saved.
File saved: results\fake_Pseudoscience_results.txt
Pickle file saved.
File saved: results\fake_Political News_results.txt
Pickle file saved.
File saved: results\fake_General News_results.txt
Pickle file saved.
File saved: results\fake_Conspiracy_results.txt
Pickle file saved.
File saved: results\fake_PAC SuperPAC_results.txt
Pickle file saved.
File saved: results\fake_Political Advocacy_results.txt

All files have been generated and saved in the 'results' folder.


In [40]:
# Iterar sobre os DataFrames no dicionário df_true_dict e processá-los sequencialmente
for category, df in df_true_dict.items():
    process_and_save(category, df, 'true')

print("\nTodos os arquivos foram gerados e salvos na pasta 'results'.")

Pickle file saved.
File saved: results\true_General News_results.txt
Pickle file saved.
File saved: results\true_Political News_results.txt
Pickle file saved.
File saved: results\true_Pro-Science_results.txt
Pickle file saved.
File saved: results\true_Political Advocacy_results.txt
Pickle file saved.
File saved: results\true_PAC SuperPAC_results.txt

Todos os arquivos foram gerados e salvos na pasta 'results'.


### Prefixpan -- analysis 

- all and filters

In [41]:
from prefixspan import PrefixSpan

# Get the sequences from the 'emotion_sequence' column
sequences = df_fake['emotion_sequence'].tolist()

# Apply PrefixSpan 
ps = PrefixSpan(sequences)
patterns = ps.frequent(minsup=max(1, int(len(sequences) * 0.01)))  # Set minimum support to 1% of 270000

# Sort the patterns by frequency (from most to least frequent)
patterns.sort(key=lambda x: x[0], reverse=True)

In [42]:
sequences = df_fake['emotion_sequence'].tolist()

# Count the total number of extracted subsequences
total_subsequences = sum(len(sequence) for sequence in sequences)

# Display the total number of extracted fake subsequences
print(f"Total number of extracted fake subsequences: {total_subsequences}")


Total number of extracted fake subsequences: 193683


In [None]:
from collections import Counter

# Flatten all emotions from all sequences
all_emotions = [emotion for sequence in sequences for emotion in sequence]

# Count the frequency of each emotion
emotion_counts = Counter(all_emotions)

# Display the total
print(emotion_counts)


Counter({5: 128752, 1: 17260, 6: 12542, 3: 11452, 4: 9038, 2: 7429, 7: 7210})


In [43]:
# Displaying the 100 most frequent patterns
for freq, pattern in patterns[:]:
    print(f"Sequence frequency: {freq}, Pattern: {pattern}")


Sequence frequency: 26246, Pattern: [5]
Sequence frequency: 23655, Pattern: [5, 5]
Sequence frequency: 19061, Pattern: [5, 5, 5]
Sequence frequency: 13588, Pattern: [5, 5, 5, 5]
Sequence frequency: 9877, Pattern: [1]
Sequence frequency: 9737, Pattern: [5, 5, 5, 5, 5]
Sequence frequency: 8200, Pattern: [6]
Sequence frequency: 7888, Pattern: [5, 1]
Sequence frequency: 7676, Pattern: [3]
Sequence frequency: 7657, Pattern: [1, 5]
Sequence frequency: 7100, Pattern: [5, 5, 5, 5, 5, 5]
Sequence frequency: 6521, Pattern: [6, 5]
Sequence frequency: 6501, Pattern: [5, 6]
Sequence frequency: 6319, Pattern: [3, 5]
Sequence frequency: 6018, Pattern: [4]
Sequence frequency: 5797, Pattern: [5, 1, 5]
Sequence frequency: 5769, Pattern: [5, 3]
Sequence frequency: 5564, Pattern: [7]
Sequence frequency: 5469, Pattern: [2]
Sequence frequency: 5448, Pattern: [5, 5, 1]
Sequence frequency: 5441, Pattern: [1, 5, 5]
Sequence frequency: 5065, Pattern: [5, 5, 5, 5, 5, 5, 5]
Sequence frequency: 5005, Pattern: [5, 

In [None]:
import pickle

# Save the 'patterns' variable to the file 'patterns_fake.pkl'
with open('patterns_fake.pkl', 'wb') as file:
    pickle.dump(patterns, file)


In [44]:
filtered_patterns = [tup for tup in patterns if len(tup[1]) >= 3 and len(set(tup[1])) > 4]
for freq, pattern in filtered_patterns[:]:
    print(f"Sequence frequency: {freq}, Pattern: {pattern}")

Sequence frequency: 310, Pattern: [1, 6, 3, 4, 5]
Sequence frequency: 304, Pattern: [1, 5, 6, 3, 4]
Sequence frequency: 301, Pattern: [1, 6, 3, 4, 5, 5]
Sequence frequency: 298, Pattern: [1, 5, 6, 3, 4, 5]
Sequence frequency: 292, Pattern: [5, 1, 6, 3, 4]
Sequence frequency: 292, Pattern: [1, 6, 3, 4, 5, 5, 5]
Sequence frequency: 291, Pattern: [1, 5, 6, 3, 4, 5, 5]
Sequence frequency: 285, Pattern: [5, 1, 6, 3, 4, 5]
Sequence frequency: 284, Pattern: [1, 5, 6, 3, 4, 5, 5, 5]
Sequence frequency: 284, Pattern: [1, 6, 3, 4, 5, 5, 5, 5]
Sequence frequency: 280, Pattern: [5, 1, 5, 6, 3, 4]
Sequence frequency: 278, Pattern: [1, 5, 5, 6, 3, 4]
Sequence frequency: 278, Pattern: [1, 5, 6, 3, 4, 5, 5, 5, 5]
Sequence frequency: 277, Pattern: [5, 1, 6, 3, 4, 5, 5]
Sequence frequency: 275, Pattern: [5, 1, 5, 6, 3, 4, 5]
Sequence frequency: 272, Pattern: [1, 5, 5, 6, 3, 4, 5]
Sequence frequency: 270, Pattern: [5, 1, 5, 6, 3, 4, 5, 5]


In [45]:
filtered_patterns_2 = [tup for tup in patterns if len(tup[1]) == 2 and len(set(tup[1])) > 1 and 5 not in tup[1]]
for freq, pattern in filtered_patterns_2[:]:
    print(f"Sequence frequency: {freq}, Pattern: {pattern}")

Sequence frequency: 2565, Pattern: [3, 1]
Sequence frequency: 2337, Pattern: [1, 6]
Sequence frequency: 2314, Pattern: [6, 1]
Sequence frequency: 2211, Pattern: [1, 3]
Sequence frequency: 2018, Pattern: [3, 6]
Sequence frequency: 1942, Pattern: [2, 1]
Sequence frequency: 1814, Pattern: [6, 3]
Sequence frequency: 1787, Pattern: [1, 2]
Sequence frequency: 1515, Pattern: [7, 1]
Sequence frequency: 1447, Pattern: [1, 4]
Sequence frequency: 1418, Pattern: [7, 6]
Sequence frequency: 1407, Pattern: [2, 6]
Sequence frequency: 1378, Pattern: [4, 1]
Sequence frequency: 1347, Pattern: [6, 4]
Sequence frequency: 1305, Pattern: [3, 2]
Sequence frequency: 1279, Pattern: [2, 3]
Sequence frequency: 1263, Pattern: [4, 6]
Sequence frequency: 1258, Pattern: [1, 7]
Sequence frequency: 1247, Pattern: [6, 2]
Sequence frequency: 1216, Pattern: [3, 4]
Sequence frequency: 1111, Pattern: [7, 4]
Sequence frequency: 1081, Pattern: [3, 7]
Sequence frequency: 1071, Pattern: [7, 3]
Sequence frequency: 1068, Pattern:

In [46]:
sequences2 = df_true['emotion_sequence'].tolist()

ps2 = PrefixSpan(sequences2)
patterns2 = ps2.frequent(minsup=max(1, int(len(sequences) * 0.01)))  

patterns2.sort(key=lambda x: x[0], reverse=True)

In [47]:
import pickle

with open('patterns_true.pkl', 'wb') as file:
    pickle.dump(patterns2, file)


In [48]:
sequences2 = df_true['emotion_sequence'].tolist()

total_subsequences2 = sum(len(sequence) for sequence in sequences2)

print(f"Total number of extracted fake subsequences: {total_subsequences2}")

Total number of extracted fake subsequences: 205912


In [52]:
all_emotions = [emotion for sequence in sequences2 for emotion in sequence]
emotion_counts = Counter(all_emotions)
print(emotion_counts)

Counter({5: 156400, 4: 13221, 6: 10080, 3: 8817, 1: 8070, 7: 5795, 2: 3529})


In [None]:
for freq, pattern in patterns2[:]:
    print(f"Sequence frequency: {freq}, Pattern: {pattern}")

Sequence frequency: 36199, Pattern: [5]
Sequence frequency: 34085, Pattern: [5, 5]
Sequence frequency: 29093, Pattern: [5, 5, 5]
Sequence frequency: 21135, Pattern: [5, 5, 5, 5]
Sequence frequency: 14336, Pattern: [5, 5, 5, 5, 5]
Sequence frequency: 9860, Pattern: [4]
Sequence frequency: 8978, Pattern: [5, 5, 5, 5, 5, 5]
Sequence frequency: 8439, Pattern: [4, 5]
Sequence frequency: 7944, Pattern: [6]
Sequence frequency: 7250, Pattern: [5, 4]
Sequence frequency: 7190, Pattern: [3]
Sequence frequency: 6336, Pattern: [6, 5]
Sequence frequency: 6335, Pattern: [4, 5, 5]
Sequence frequency: 6007, Pattern: [5, 6]
Sequence frequency: 5897, Pattern: [3, 5]
Sequence frequency: 5781, Pattern: [5, 4, 5]
Sequence frequency: 5735, Pattern: [1]
Sequence frequency: 5268, Pattern: [5, 5, 5, 5, 5, 5, 5]
Sequence frequency: 5186, Pattern: [7]
Sequence frequency: 4836, Pattern: [5, 3]
Sequence frequency: 4692, Pattern: [5, 5, 4]
Sequence frequency: 4481, Pattern: [5, 6, 5]
Sequence frequency: 4382, Patter

In [50]:
filtered_patterns2 = [tup for tup in patterns2 if len(tup[1]) >= 3 and len(set(tup[1])) > 2]
for freq, pattern in filtered_patterns2[:]:
    print(f"Sequence frequency: {freq}, Pattern: {pattern}")

Sequence frequency: 808, Pattern: [4, 6, 5]
Sequence frequency: 806, Pattern: [3, 6, 5]
Sequence frequency: 720, Pattern: [3, 1, 5]
Sequence frequency: 713, Pattern: [5, 4, 6]
Sequence frequency: 672, Pattern: [4, 5, 6]
Sequence frequency: 665, Pattern: [6, 4, 5]
Sequence frequency: 654, Pattern: [6, 3, 5]
Sequence frequency: 622, Pattern: [7, 4, 5]
Sequence frequency: 598, Pattern: [5, 6, 4]
Sequence frequency: 583, Pattern: [4, 7, 5]
Sequence frequency: 574, Pattern: [3, 5, 6]
Sequence frequency: 565, Pattern: [5, 3, 6]
Sequence frequency: 564, Pattern: [1, 6, 5]
Sequence frequency: 559, Pattern: [6, 5, 4]
Sequence frequency: 551, Pattern: [3, 4, 5]
Sequence frequency: 549, Pattern: [5, 6, 3]
Sequence frequency: 544, Pattern: [3, 5, 1]
Sequence frequency: 539, Pattern: [4, 3, 5]
Sequence frequency: 522, Pattern: [5, 3, 1]
Sequence frequency: 522, Pattern: [6, 1, 5]
Sequence frequency: 516, Pattern: [1, 3, 5]
Sequence frequency: 504, Pattern: [7, 5, 4]
Sequence frequency: 494, Pattern

In [51]:
filtered_patterns_2 = [tup for tup in patterns2 if len(tup[1]) == 2 and len(set(tup[1])) > 1 and 5 not in tup[1]]
for freq, pattern in filtered_patterns_2[:]:
    print(f"Sequence frequency: {freq}, Pattern: {pattern}")

Sequence frequency: 1180, Pattern: [3, 6]
Sequence frequency: 1166, Pattern: [4, 6]
Sequence frequency: 1102, Pattern: [3, 1]
Sequence frequency: 981, Pattern: [6, 3]
Sequence frequency: 973, Pattern: [6, 4]
Sequence frequency: 871, Pattern: [1, 6]
Sequence frequency: 846, Pattern: [6, 1]
Sequence frequency: 842, Pattern: [7, 4]
Sequence frequency: 822, Pattern: [4, 7]
Sequence frequency: 812, Pattern: [1, 3]
Sequence frequency: 761, Pattern: [4, 3]
Sequence frequency: 740, Pattern: [3, 4]
Sequence frequency: 645, Pattern: [4, 1]
Sequence frequency: 624, Pattern: [7, 6]
Sequence frequency: 614, Pattern: [2, 1]
Sequence frequency: 546, Pattern: [2, 6]
Sequence frequency: 514, Pattern: [6, 7]
Sequence frequency: 493, Pattern: [1, 4]
Sequence frequency: 489, Pattern: [3, 7]
Sequence frequency: 470, Pattern: [1, 2]
Sequence frequency: 462, Pattern: [7, 3]
Sequence frequency: 462, Pattern: [2, 3]
Sequence frequency: 414, Pattern: [3, 2]
Sequence frequency: 384, Pattern: [6, 2]
Sequence freq

### Decision Rules


#### Apriori


In [53]:
df = df_filtered.copy()

In [54]:
# just in case
emotion_vocab = {
    'anger': 1, 
    'disgust': 2, 
    'fear': 3, 
    'joy': 4, 
    'neutral': 5, 
    'sadness': 6, 
    'surprise': 7 
}

# Target emotions for the rule
TARGET_EMOTIONS = {emotion_vocab['fear'], emotion_vocab['anger'], emotion_vocab['sadness']}

# Function to check if all target emotions are present in the sequence
def contains_all_emotions(emotion_list):
    return TARGET_EMOTIONS.issubset(set(emotion_list))

# Total number of records
total_rows = len(df)

# Identify records containing all three emotions
has_all_emotions = df['emotion_sequence'].apply(contains_all_emotions)
total_with_emotions = has_all_emotions.sum()

# Identify records containing the emotions AND classified as fake
is_fake = df['classification'] == 'fake news'
rule_satisfied = has_all_emotions & is_fake
rule_support_count = rule_satisfied.sum()

# Calculate support and confidence
support = rule_support_count / total_rows
confidence = rule_support_count / total_with_emotions if total_with_emotions > 0 else 0

# Display the results
print("using the entire dataset")
print(f"Support: {support:.4f}")
print(f"Confidence: {confidence:.4f}")


using the entire dataset
Support: 0.0255
Confidence: 0.7770


In [None]:
from mlxtend.frequent_patterns import apriori, association_rules

id_to_emotion = {v: k for k, v in emotion_vocab.items()}

# Convert lists of emotion IDs into emotion names
def map_emotions(row):
    return set([id_to_emotion.get(e) for e in row if e in id_to_emotion])

# Create transactions combining emotions + class label
df['transaction'] = df['emotion_sequence'].apply(map_emotions)
df['transaction'] = df.apply(lambda row: row['transaction'].union({f"label={row['classification']}"}), axis=1)

# Transform into one-hot encoded DataFrame for Apriori
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(df['transaction']).transform(df['transaction'])
df_onehot = pd.DataFrame(te_ary, columns=te.columns_)

# Apply Apriori
frequent_itemsets = apriori(df_onehot, min_support=0.01, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

# Filter only rules leading to 'label=fake'
rules_to_fake = rules[rules['consequents'].apply(lambda x: 'label=fake news' in x)]

# Sort by confidence and support
rules_to_fake = rules_to_fake.sort_values(by=['confidence', 'support'], ascending=[False, False])

# Display the rules
pd.set_option('display.max_colwidth', None)
rules_to_fake[['antecedents', 'consequents', 'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
177,"(sadness, joy, fear, anger)",(label=fake news),0.010046,0.896359,2.114035
170,"(surprise, disgust, anger, neutral)",(label=fake news),0.012212,0.889143,2.097017
68,"(surprise, disgust, anger)",(label=fake news),0.01251,0.884573,2.086238
223,"(disgust, anger, neutral, fear, sadness)",(label=fake news),0.01182,0.881733,2.079541
194,"(surprise, joy, anger, neutral)",(label=fake news),0.01262,0.875817,2.065588
155,"(disgust, sadness, fear, anger)",(label=fake news),0.011992,0.875143,2.063999
121,"(surprise, disgust, sadness)",(label=fake news),0.01047,0.874181,2.06173
88,"(surprise, joy, anger)",(label=fake news),0.012809,0.873662,2.060505
206,"(surprise, disgust, sadness, neutral)",(label=fake news),0.01014,0.872973,2.058881
161,"(neutral, disgust, joy, anger)",(label=fake news),0.012212,0.86637,2.043307


In [51]:
rules_to_fake[['antecedents', 'consequents', 'support', 'confidence', 'lift']].to_csv(r'C:\Users\vieir\Mestrado\TESE\pipeline\resultados\association_ruler_fake.csv', index=False)

In [None]:
# Filter only rules leading to 'label=reliable'
rules_to_reliable = rules[rules['consequents'].apply(lambda x: 'label=reliable news' in x)]

rules_to_reliable = rules_to_reliable.sort_values(by=['confidence', 'support'], ascending=[False, False])

pd.set_option('display.max_colwidth', None)
rules_to_reliable[['antecedents', 'consequents', 'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
45,"(neutral, joy)",(label=reliable news),0.152118,0.627168,1.08884
5,(joy),(label=reliable news),0.154771,0.620985,1.078106
47,(joy),"(neutral, label=reliable news)",0.152118,0.610341,1.074146


In [54]:
rules_to_reliable[['antecedents', 'consequents', 'support', 'confidence', 'lift']].to_csv(r'C:\Users\vieir\Mestrado\TESE\pipeline\resultados\association_ruler_reliable.csv', index=False)