In [2]:
import pathlib as pl
import numpy as np
import pandas as pd
import os

In [37]:
class TrainingData:
    def __init__(self, file_path: pl.Path) -> None:
        """Instantiate TrainingData class object."""
        self.df = self.get_training_data(file_path)
        self.rinse_training_data()
        self.add_labels()

    def get_training_data(self, file_path: pl.Path) -> pd.DataFrame:
        """Import and typecast training data from csv file."""
        df = pd.read_csv(file_path)
        df = df.drop(df.columns[0], axis=1)
        df["type"] = pd.Categorical(df.type)
        return df

    def rinse_training_data(self) -> None:
        """Rinse training data."""
        df = self.df[self.df['type'].notna()]
        self.df = df

    def add_labels(self) -> None:
        """Add custom labels based on 'type' column."""
        labels = {
            "unreliable": "unknown",
            "fake": "fake",
            "clickbait": "unknown",
            "conspiracy": "fake",
            "reliable": "real",
            "bias": "unknown",
            "hate": "unknown",
            "junksci": "fake",
            "political": "unknown",
            "unknown": "unknown"
        }
        def lookup_labels(data) -> pd.Categorical:
            return labels[data["type"]]
        self.df["labels"] = self.df.apply(lookup_labels, axis=1)



In [7]:
# parent = src/, parent.parent = fake-news/
file_path = pl.Path(os.path.abspath('')).parent.resolve() / "data_files/corpus/reduced_corpus.csv"

df = pd.read_csv(file_path)
df[df["type"] == "reliable"]


Unnamed: 0.1,Unnamed: 0,id,domain,type,url,content,scraped_at,inserted_at,updated_at,title,authors,keywords,meta_keywords,meta_description,tags,summary,source
8,42,52,christianpost.com,reliable,https://www.christianpost.com/news/destiny-2-n...,"Facebook/DestinyTheGame Promo image for ""Desti...",2018-01-25 16:17:44.789555,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,'Destiny 2' News: Bungie Addresses Player Comp...,,,[''],"""Destiny 2's"" Faction Rallies was one of the m...",,,
11,83,102,christianpost.com,reliable,https://www.christianpost.com/news/professor-c...,"(Screenshot: Facebook) Dr. Melina Abdullah, pr...",2018-01-25 16:17:44.789555,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,Professor Conducts Libation to Summon Spirits ...,,,[''],"Dr. Melina Abdullah, professor and chair of Pa...",,,
12,84,103,christianpost.com,reliable,https://www.christianpost.com/news/ghost-ship-...,(Photo: Reuters) The teams of North and South ...,2018-01-25 16:17:44.789555,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,'Ghost Ship' With 8 Dead N. Koreans Washes Up ...,,,[''],While news of North and South Korea agreeing t...,,,
13,85,104,christianpost.com,reliable,https://www.christianpost.com/news/facebook-ba...,(Photo: Reuters/Dado Ruvic) Facebook.\n\nUPDAT...,2018-01-25 16:17:44.789555,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,Facebook Lifts Ban on Paid Ads for Pro-Life Fi...,,,[''],Facebook said Thursday that it has lifted its ...,,,
14,86,105,christianpost.com,reliable,https://www.christianpost.com/news/iranian-chr...,(Screencap: YouTube/Tidningen Dagen) Annahita ...,2018-01-25 16:17:44.789555,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,"Iranian Christian Convert Leads 1,500 Muslims ...",,,[''],"An Iranian convert to Christianity, now an ord...",,,
15,87,106,christianpost.com,reliable,https://www.christianpost.com/news/justice-dep...,(Screenshot: YouTube / MJofLakeland1) A Metro ...,2018-01-25 16:17:44.789555,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,Justice Dept. Says DC Metro Violated Religious...,,,[''],The United States Justice Department said that...,,,
16,88,107,christianpost.com,reliable,https://www.christianpost.com/news/chick-fil-a...,(Screenshot:Chick-fil-A) A Chick-fil-A restaur...,2018-01-25 16:17:44.789555,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,"Chick-fil-A Kicks Out Breastfeeding Mom, Warni...",,,[''],"A Chick-fil-A restaurant in Fargo, North Dakot...",,,
17,89,108,christianpost.com,reliable,https://www.christianpost.com/news/forcing-pro...,(Photo: Reuters/James Lawler Duggan) The annua...,2018-01-25 16:17:44.789555,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,Forcing Pro-Life Groups to Promote Abortion 'D...,,,[''],Numerous conservative groups and more than a h...,,,
18,100,120,christianpost.com,reliable,https://www.christianpost.com/news/school-dist...,(Facebook/Support Kountze Kids Faith)\n\nA Tex...,2018-01-25 16:17:44.789555,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,School District Appeals Texas Cheerleaders' Bi...,,,[''],A Texas public school district is hoping the s...,,,
30,164,192,christianpost.com,reliable,https://www.christianpost.com/news/democrat-na...,(Screenshot: YouTube/Washington Post) Democrat...,2018-01-25 16:17:44.789555,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,Democrat Leader Nancy Pelosi Joins Trump-Suppo...,,,[''],Democrat House Minority Leader Nancy Pelosi an...,,,
