-
Notifications
You must be signed in to change notification settings - Fork 0
/
data.py
47 lines (36 loc) · 1.46 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import requests
import json
# client_id = '.....'
# client_secret = '....'
auth = requests.auth.HTTPBasicAuth(client_id, client_secret)
data = {'grant_type': 'password',
'username': '......',
'password': '......'}
headers = {'User-Agent': 'MyAPI/0.0.1'}
response = requests.post('https://www.reddit.com/api/v1/access_token',
auth=auth, data=data, headers=headers)
TOKEN = response.json()['access_token']
headers['Authorization'] = f'bearer {TOKEN}'
subreddits = ['TheOnion', 'nottheonion']
for subreddit in subreddits:
titles = set()
after = None
while len(titles) < 300:
url = f'https://oauth.reddit.com/r/{subreddit}/top?limit=100&t=all'
if after:
url += f'&after={after}'
response = requests.get(url, headers=headers)
posts = response.json()['data']['children']
after = response.json()['data']['after']
for post in posts:
if post['data']['title'][0] == '"' or post['data']['title'][0] == "'" or post['data']['title'][0] == " ":
headline = post['data']['title'][1].upper() + post['data']['title'][2:]
else:
headline = post['data']['title'].lower().capitalize()
source = post['data']['url']
titles.add((headline, source))
if not after:
break
titles = list(titles)[:300]
with open(f'{subreddit}_titles.json', 'w') as f:
json.dump(titles, f)