Skip to content

Commit

Permalink
fixed snopes
Browse files Browse the repository at this point in the history
  • Loading branch information
wybiral committed Jan 27, 2021
1 parent d37e7d9 commit 5702320
Showing 1 changed file with 16 additions and 14 deletions.
30 changes: 16 additions & 14 deletions sources/snopes.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,29 @@ async def update(self, db, queue):
async with s.get(url) as r:
text = await r.text()
soup = BeautifulSoup(text, 'html.parser')
div = soup.find('div', {'class': 'media-list'})
articles = div.find_all('article', {'class': 'media-wrapper'})
div = soup.find('div', {'class': 'list-group list-group-flush'})
articles = div.find_all('article')
for article in reversed(articles):
await self._update_article(db, queue, article)

async def _update_article(self, db, queue, article):
x = {}
a = article.find('a')
x['url'] = a['href']
x['title'] = a.find('h5').get_text()
li = a.find('li', {'class': 'date breadcrumb-item'})
date = datetime.strptime(li.get_text().strip(), '%d %B %Y')
date = date.strftime('%Y-%m-%d')
x['published'] = date
p = a.find('p', {'class': 'subtitle'})
x['body'] = p.get_text()
img = a.find('img')
if img is not None:
srcset = img['data-lazy-srcset']
srcset = srcset.split('w,')
x['thumb'] = srcset[1].split(' ')[0]
div = article.find('div', {'class': 'media-body'})
x['title'] = div.find('span', {'class': 'title h5'}).get_text()
# they got rid of pub date, using today
# (may consider using date from thumbnail URL)
date = datetime.now()
x['published'] = date.strftime('%Y-%m-%d')
x['body'] = div.find('span', {'class': 'subtitle d-flex'}).get_text()
figure = article.find('figure')
if figure is not None:
img = figure.find('img')
if img is not None:
srcset = img['data-lazy-srcset']
srcset = srcset.split('w,')
x['thumb'] = srcset[1].split(' ')[0]
x['source_name'] = self.name
x['source_url'] = self.url
x['id'] = self.module + ':' + x['url']
Expand Down

0 comments on commit 5702320

Please sign in to comment.