In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.nhl_db
collection = db.articles

In [4]:
# URL of page to be scraped
url = 'https://www.nhl.com/news'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [5]:
# Retrieve the parent divs for all articles
results = soup.find_all('div', class_='article-item__top')

# loop over results to get article data
for result in results:
    # scrape the article header 
    header = result.find('h1', class_='article-item__headline').text
    
    # scrape the article subheader
    subheader = result.find('h2', class_='article-item__subheader').text
    
    # scrape the datetime
    datetime = result.find('span', class_='article-item__date')['data-date'] 
    
    # get only the date from the datetime
    date = datetime.split('T')[0]
    
    # print article data
    print('-----------------')
    print(header)
    print(subheader)
    print(date)

    # Dictionary to be inserted into MongoDB
    post = {
        'header': header,
        'subheader': subheader,
        'date': date,
    }

    # Insert dictionary into MongoDB as a document
    collection.insert_one(post)

-----------------
Blues, Bruins Cup Final rematch will be 'emotional game,' Berube says
St. Louis coach, players embracing championship memories, Boston focused on two points
2019-10-25
-----------------
McDonald reminisces about playing hockey outdoors before Heritage Classic
Hockey Hall of Famer believes Flames game against Jets will be special for fans in Regina
2019-10-26
-----------------
Gudbranson traded to Ducks by Penguins
Pittsburgh receives forward Martinsen, seventh-round pick in 2021 NHL Draft
2019-10-25
-----------------
Dubnyk returns to practice with Wild, likely will miss one more game
Goalie not expected to play against Kings because of injury to side
2019-10-25
-----------------
NHL fantasy team power rankings for 2019-20
Kane's power-play impact has Sharks trending up; Bailey emerges as streaming option from Islanders
2019-10-25
-----------------
Blues, Bruins to play in latest game between Stanley Cup finalists
St. Louis, Boston face off in regular season for first

In [6]:
# Display the MongoDB records created above
articles = db.articles.find()
for article in articles:
    print(article)

{'_id': ObjectId('5db4785c3c72308e0ede0f93'), 'header': "Blues, Bruins Cup Final rematch will be 'emotional game,' Berube says", 'subheader': 'St. Louis coach, players embracing championship memories, Boston focused on two points', 'date': '2019-10-25'}
{'_id': ObjectId('5db4785c3c72308e0ede0f94'), 'header': 'McDonald reminisces about playing hockey outdoors before Heritage Classic', 'subheader': 'Hockey Hall of Famer believes Flames game against Jets will be special for fans in Regina', 'date': '2019-10-26'}
{'_id': ObjectId('5db4785c3c72308e0ede0f95'), 'header': 'Gudbranson traded to Ducks by Penguins', 'subheader': 'Pittsburgh receives forward Martinsen, seventh-round pick in 2021 NHL Draft', 'date': '2019-10-25'}
{'_id': ObjectId('5db4785c3c72308e0ede0f96'), 'header': 'Dubnyk returns to practice with Wild, likely will miss one more game', 'subheader': 'Goalie not expected to play against Kings because of injury to side', 'date': '2019-10-25'}
{'_id': ObjectId('5db4785c3c72308e0ede0