# Creates the factckbr_processed.csv dataset

##### Entries with class 1 are real news in the factckbr_processed.csv
##### Entries with class 0 are fake news in the factckbr_processed.csv

In [1]:
import pandas as pd

# Load the original dataset
file_path = './FACTCKBR.tsv'
df = pd.read_csv(file_path, sep='\t')

# Drop rows where ratingValue is empty
df = df.dropna(subset=['ratingValue'])

# Apply lower case to all values in the column "alternativeName"
df['alternativeName'] = df['alternativeName'].str.lower()

# Update ratingValue based on the value in "alternativeName"
df.loc[df['alternativeName'] == 'falso', 'ratingValue'] = 0
df.loc[df['alternativeName'] == 'verdadeiro', 'ratingValue'] = 1

# Select only the required columns
filtered_df = df[['claimReviewed', 'ratingValue']]

# Rename the columns
filtered_df = filtered_df.rename(columns={
    'claimReviewed': 'news',
    'ratingValue': 'class'
})

# Ensure the class column is of integer type
filtered_df['class'] = filtered_df['class'].astype(int)

# Filter rows where class is 0 or 1
filtered_df = filtered_df[filtered_df['class'].isin([0, 1])]

filtered_df['news'] = filtered_df['news'].str.lower()

# Save the filtered DataFrame to a new CSV file with comma delimiter
output_file_path = './factckbr_processed.csv'
filtered_df.to_csv(output_file_path, index=False)