In [1]:
import pandas as pd
from newsapi import NewsApiClient

# Import api key
from config import api_key_news

# Extract News

In [2]:
# Start session
newsapi = NewsApiClient(api_key=api_key_news)

### Get just BTC 

In [3]:
BTC_snapshot = newsapi.get_everything(q='bitcoin OR btc',
                                      from_param='2023-03-15',
                                      to='2023-04-14',
                                      language='en')

### Get just DOGE

In [4]:
doge_snapshot = newsapi.get_everything(q='dogecoin OR doge',
                                      from_param='2023-03-15',
                                      to='2023-04-14',
                                      language='en')

### Bank Snapshot

In [14]:
bank_snapshot = newsapi.get_everything(q='bank',
                                      from_param='2023-03-11',
                                      to='2023-04-09',
                                      language='en')

In [15]:
bank_snapshot

{'status': 'ok',
 'totalResults': 72868,
 'articles': [{'source': {'id': 'engadget', 'name': 'Engadget'},
   'author': 'Devindra Hardawar',
   'title': 'Engadget Podcast: Why did Silicon Valley Bank fail so hard?',
   'description': 'The downfall of Silicon Valley bank affects the entire technology industry. This week, we chat with Alex Wilhelm, editor in chief at TechCrunch+, about why SVB failed and what it means for the broader ecosystem. Where were the regulations? And why were VCs so…',
   'url': 'https://www.engadget.com/engadget-podcast-elon-musk-neuralink-human-trials-denied-123014095.html',
   'urlToImage': 'https://s.yimg.com/uu/api/res/1.2/tdmZDSyzf3jjlI17q4HATQ--~B/Zmk9ZmlsbDtoPTYzMDtweW9mZj0wO3c9MTIwMDthcHBpZD15dGFjaHlvbg--/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-uploaded-images/2020-05/f43b53b0-909f-11ea-87fd-d8d721459924.cf.jpg',
   'publishedAt': '2023-03-17T12:30:14Z',
   'content': 'The downfall of Silicon Valley bank affects the entire technology industry.

# Transform
Transform into dataframes to load as csv's

### Function to Transform

In [9]:
# Function needs to grab just the articles, cut unneeded fields, renamed date
def transform_snapshot(snapshot):
    # Create a df from just the articles in the snapshot
    df = pd.DataFrame(snapshot["articles"])

    # Cut out unneeded fields
    df_cut = df.loc[:,['publishedAt', 'title', 'description']]

    # rename "publishedAt" to be "date"
    df_cut.rename(columns={"publishedAt": "date"}, inplace = True)

    # Add column where title and description are combined to increase word count for vader to work with
    df_cut['summary'] = df_cut['title'] + df_cut['description']

    # Drop the redundant columns of title and description
    df_cut.drop(columns=['title','description'], inplace=True)

    # Clean up the date column to just have the day
    # cast the date column to datetime
    # grab just the date of the timestamp
    df_cut = df_cut.astype({"date": "datetime64"})
    df_cut['date'] = df_cut['date'].apply( lambda x: x.date())

    return df_cut


### Transform BTC

In [10]:
# apply the transform function
btc_snapshot_df = transform_snapshot(BTC_snapshot)
btc_snapshot_df

Unnamed: 0,date,summary
0,2023-04-13,Twitter Partners with eToro to Let Users Buy a...
1,2023-04-06,Here’s How to Find the Original Bitcoin Manife...
2,2023-04-04,Cryptoverse: Bitcoin traders like their option...
3,2023-03-20,In which Balaji gives away at least a million ...
4,2023-04-05,Michael Saylor's MicroStrategy adds to its bit...
...,...,...
95,2023-04-05,Bitcoin: MicroStrategy Buys 1045 BTC for $29.3...
96,2023-04-10,"Cathie Wood Backs Balaji’s $1M BTC Forecast, D..."
97,2023-03-17,Why Bitcoin Miner Stocks Soared This WeekBitco...
98,2023-03-17,Over $55M Crypto Shorts Blown Away In 12 Hours...


### Doge News

In [11]:
# Apply the transform function
doge_snapshot_df = transform_snapshot(doge_snapshot)
doge_snapshot_df

Unnamed: 0,date,summary
0,2023-04-06,You Can Remove That Stupid Doge Icon From Your...
1,2023-04-03,Dogecoin Jumps After Token's Symbol Replaces B...
2,2023-04-06,Dogecoin's sharp rally fades after brief burst...
3,2023-04-04,Dogecoin Soars 30% After Elon Musk Replaces Tw...
4,2023-04-05,Breakingviews - Elon Musk's Doge barks up SEC'...
...,...,...
95,2023-04-12,Ethereum Trades Flat After Ethereum Shanghai U...
96,2023-04-01,Elon Musk requests dismissal of $258B Dogecoin...
97,2023-04-07,Dogecoin Tanks After Twitter Restores Iconic L...
98,2023-04-04,"Twitter is no longer the bird app, changes ico..."


### Banking News

In [None]:
# Apply the transform function
banking_snapshot_df = transform_snapshot(bank_snapshot)
banking_snapshot_df

# Load

In [None]:
# Write btc to csv
btc_snapshot_df.to_csv("Resources_crypto_articles/btc_snapshot_after_SVB.csv")

In [36]:
# Write banking df to csv
banking_snapshot_df.to_csv("News_Snapshots/banking_snapshot_410.csv")

In [38]:
# Write crypto df to csv
crypto_snapshot_df.to_csv("News_Snapshots/crypto_snapshot_410.csv")