## NewsFlow ETL

### Import Libraries and Connect to the News API

In [None]:
import pandas as pd
import sqlite3
from newsapi import NewsApiClient
import logging
from config import api_key

In [None]:
news_api = NewsApiClient(api_key=api_key['API_KEY'])

### Retrieve and Print News Articles

In [None]:
def extract_news_data():
    try:
        result = news_api.get_everything(q="AI", language="en",sort_by='publishedAt')
        logging.info("Connection is successful.")
        return result["articles"]
    except:
        logging.error("Connection is unsuccessful.")
        return None

articles = extract_news_data()

print(articles[:3])

### Clean Author Column

In [None]:
def clean_author_name(text):
    try:
        name = text.split(',')[0].title()
        return name
    except AttributeError:
        return "No Author"

### Transform News Data

In [None]:
def transform_news_data(articles):
    article_list = []
    for i in articles:
        article_list.append([value.get("name", 0) if key == "source" else value for key, value in i.items() if key in ["author", "title", "publishedAt", "content", "url", "source"]])

    df = pd.DataFrame(article_list, columns=["Source", "Author Name", "News Title", "URL", "Date Published", "Content"])

    df["Date Published"] = pd.to_datetime(df["Date Published"]).dt.strftime('%Y-%m-%d %H:%M:%S')
    df["Author Name"] = df["Author Name"].apply(clean_author_name)
 
    return df

transformed_data = transform_news_data(articles)

### Load the Data into SQLite Database


### Verify Data Loading