# Scraping DBS press releases using their newsfeed function

In [3]:
# Import libaries

from bs4 import BeautifulSoup
import pandas as pd
import requests

### Step 1: Create a soup object from the DBS newsfeed

In [4]:
# Target web page:
url = "https://www.dbs.com/newsroom/news-feed.page?Country=sg"

# Establishing the connection to the web page:
response = requests.get(url)


# You can use status codes to understand how the target server responds to your request.
# Ex., 200 = OK, 400 = Bad Request, 403 = Forbidden, 404 = Not Found.
print(response.status_code)

# Pull the HTML string out of requests and convert it to a Python string.
html = response.text

# The first 700 characters of the content.
#print(html)

200


In [5]:
print(html)

<?xml version="1.0" encoding="UTF-8"?><rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" encoding="UTF-8" version="2.0">
<channel>
<title>DBS News Releases</title>
<description>DBS News Releases</description>
<link>http://www.dbs.com/newsroom/NewsroomRSS/DBS_News_Singapore.rss?format=xml</link>
<language>en</language>
<copyright>DBS</copyright>
<pubDate>Tue, 4 Feb 2014 14:00:25 +08:00 </pubDate>
<lastBuildDate>Tue, 4 Feb 2014 14:00:25 +08:00 </lastBuildDate>
<itunes:category text="DBSNewsRelease">
<itunes:category text="English"/>
</itunes:category>
<itunes:author>DBS</itunes:author>
<itunes:image/>
<itunes:subtitle>DBS News Releases</itunes:subtitle>
<itunes:owner>
<itunes:name>DBS</itunes:name>
<itunes:email/>
</itunes:owner>
<itunes:explicit>no</itunes:explicit>
<itunes:keywords>DBS,English,news release,rss,web,news</itunes:keywords>
<image>
<title/>
<link>http://www.dbs.com</link>
<description/>
</image><item><title>DBS named ‘World’s Best Bank’ by Euromoney magazine</t

### Step 2: Scrape the soup for every press release

Note: Your best bet is to create a list of dictionaries, one for each press release. The result of your scrape should look something like this:

```python
pressers = [
    {'title': "DBS named ‘World’s Best Bank’ by Euromoney magazine", 'date': '11 Jul 2019', 'link': 'a/newsroom/DBS_named_Worlds_Best_Bank_by_Euromoney_magazine'}, 
    ...
]
```

In [26]:
# This code collects the pressers

# List to store results
pressers = []

for element in all_items:
    # start a dictionary to store this item's data
    result = {}
    result['title'] = element.find('title').text
    result['date'] = element.find('pubdate').text
    result['link'] = 'www.dbs.com' + element.find('guid').text 
    # only store "full" rows of data
    if len(result) == 3:
        pressers.append(result)
        
pressers

[{'title': 'DBS named ‘World’s Best Bank’ by Euromoney magazine',
  'date': '11 Jul 2019',
  'link': 'www.dbs.com/newsroom/DBS_named_Worlds_Best_Bank_by_Euromoney_magazine'},
 {'title': "DBS launches Singapore's first one-stop integrated travel marketplace with Singapore Airlines, Expedia Partner Solutions and Chubb",
  'date': '09 Jul 2019',
  'link': 'www.dbs.com/newsroom/DBS_launches_Singapores_first_one_stop_integrated_travel_marketplace_with_Singapore_Airlines_Expedia_Partner_Solutions_and_Chubb'},
 {'title': 'DBS debuts second season of SPARKS, an award-winning mini-series, with focus on sustainability',
  'date': '28 Jun 2019',
  'link': 'www.dbs.com/newsroom/DBS_debuts_second_season_of_SPARKS_an_award_winning_mini_series_with_focus_on_sustainability'},
 {'title': 'DBS Foundation Social Enterprise Grant 2019 opens for submissions',
  'date': '19 Jun 2019',
  'link': 'www.dbs.com/newsroom/DBS_Foundation_Social_Enterprise_Grant_2019_opens_for_submissions'},
 {'title': 'DBS, GOJEK 

### Step 3: Create a pandas DataFrame from your list of press releases

In [31]:
#put results into a datadrame
df = pd.DataFrame(pressers)
df.head()

Unnamed: 0,date,link,title
0,11 Jul 2019,www.dbs.com/newsroom/DBS_named_Worlds_Best_Ban...,DBS named ‘World’s Best Bank’ by Euromoney mag...
1,09 Jul 2019,www.dbs.com/newsroom/DBS_launches_Singapores_f...,DBS launches Singapore's first one-stop integr...
2,28 Jun 2019,www.dbs.com/newsroom/DBS_debuts_second_season_...,"DBS debuts second season of SPARKS, an award-w..."
3,19 Jun 2019,www.dbs.com/newsroom/DBS_Foundation_Social_Ent...,DBS Foundation Social Enterprise Grant 2019 op...
4,13 Jun 2019,www.dbs.com/newsroom/DBS_GOJEK_EXPAND_PAYMENT_...,"DBS, GOJEK expand payment services partnership..."


### Step 4: Export to csv

**Note:** Don't export the index column from your DataFrame

In [32]:
df.to_csv(r'./dbs_press_releases.csv', index=False)