In [1]:
import pathlib as pl
import numpy as np
import pandas as pd
import os

In [37]:
class TrainingData:
    def __init__(self, file_path: pl.Path) -> None:
        """Instantiate TrainingData class object."""
        self.df = self.get_training_data(file_path)
        self.rinse_training_data()
        self.add_labels()

    def get_training_data(self, file_path: pl.Path) -> pd.DataFrame:
        """Import and typecast training data from csv file."""
        df = pd.read_csv(file_path)
        df = df.drop(df.columns[0], axis=1)
        df["type"] = pd.Categorical(df.type)
        return df

    def rinse_training_data(self) -> None:
        """Rinse training data."""
        df = self.df[self.df['type'].notna()]
        self.df = df

    def add_labels(self) -> None:
        """Add custom labels based on 'type' column."""
        labels = {
            "unreliable": "unknown",
            "fake": "fake",
            "clickbait": "unknown",
            "conspiracy": "fake",
            "reliable": "real",
            "bias": "unknown",
            "hate": "unknown",
            "junksci": "fake",
            "political": "unknown",
            "unknown": "unknown"
        }
        def lookup_labels(data) -> pd.Categorical:
            return labels[data["type"]]
        self.df["labels"] = self.df.apply(lookup_labels, axis=1)



In [40]:
# parent = src/, parent.parent = fake-news/
file_path = pl.Path(os.path.abspath('')).parent.resolve() / "data_files/news_sample.csv"

trn = TrainingData(file_path)
trn.df


Unnamed: 0,id,domain,type,url,content,scraped_at,inserted_at,updated_at,title,authors,keywords,meta_keywords,meta_description,tags,summary,labels
0,141,awm.com,unreliable,http://awm.com/church-congregation-brings-gift...,Sometimes the power of Christmas will make you...,2018-01-25 16:17:44.789555,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,Church Congregation Brings Gift to Waitresses ...,Ruth Harris,,[''],,,,unknown
1,256,beforeitsnews.com,fake,http://beforeitsnews.com/awakening-start-here/...,AWAKENING OF 12 STRANDS of DNA – “Reconnecting...,2018-01-25 16:17:44.789555,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,AWAKENING OF 12 STRANDS of DNA – “Reconnecting...,Zurich Times,,[''],,,,fake
2,700,cnnnext.com,unreliable,http://www.cnnnext.com/video/18526/never-hike-...,Never Hike Alone: A Friday the 13th Fan Film U...,2018-01-25 16:17:44.789555,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,Never Hike Alone - A Friday the 13th Fan Film ...,,,[''],Never Hike Alone: A Friday the 13th Fan Film ...,,,unknown
3,768,awm.com,unreliable,http://awm.com/elusive-alien-of-the-sea-caught...,"When a rare shark was caught, scientists were ...",2018-01-25 16:17:44.789555,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,Elusive ‘Alien Of The Sea ‘ Caught By Scientis...,Alexander Smith,,[''],,,,unknown
4,791,bipartisanreport.com,clickbait,http://bipartisanreport.com/2018/01/21/trumps-...,Donald Trump has the unnerving ability to abil...,2018-01-25 16:17:44.789555,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,Trump’s Genius Poll Is Complete & The Results ...,Gloria Christie,,[''],,,,unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,38572,beforeitsnews.com,fake,http://beforeitsnews.com/celebrities/2014/04/t...,The Disturbing Sugar Daddy/Sugar Baby Relation...,2018-01-25 20:13:50.426130,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,The Disturbing Sugar Daddy/Sugar Baby Relation...,,,[''],,,,fake
243,39134,beforeitsnews.com,fake,http://beforeitsnews.com/contributor/pages/20/...,press\n\nBefore It's News ©\n\npeople powered ...,2018-01-25 20:13:50.426130,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,WELS - Through My Bible on Streams,,,[''],,,,fake
244,39208,washingtonexaminer.com,political,http://www.washingtonexaminer.com/senate-agree...,The Senate voted late Thursday to start work o...,2018-01-25 20:13:50.426130,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,"Senate agrees to start work on spending bill, ...",Susan Ferrechio,,"['Spending', 'Government Shutdown', 'Continuin...","'We want to move forward, we want to do someth...",,,unknown
245,39259,beforeitsnews.com,fake,http://beforeitsnews.com/economy/2017/12/priso...,"Prison for Rahm, God’s Work And Many Others\n\...",2018-01-25 20:13:50.426130,2018-02-02 01:19:41.756632,2018-02-02 01:19:41.756664,"Prison for Rahm, God’s Work And Many Others",,,[''],,,,fake


3.710193199082443
3.7101931990824424
