In [56]:
# Import libraries needed for webscraping
import requests
import json

In [42]:
url = 'https://api.pushshift.io/reddit/search/submission'

In [43]:
params = {
    'subreddit': 'MicrosoftTeams',
    'size': 500
}

In [44]:
res = requests.get(url, params)

In [45]:
res.status_code

200

In [46]:
data = res.json()
posts = data['data']

In [47]:
len(posts)

100

In [48]:
posts[0]

{'all_awardings': [],
 'allow_live_comments': False,
 'author': 'Oqtune',
 'author_flair_css_class': None,
 'author_flair_richtext': [],
 'author_flair_text': None,
 'author_flair_type': 'text',
 'author_fullname': 't2_98ivd',
 'author_is_blocked': False,
 'author_patreon_flair': False,
 'author_premium': False,
 'awarders': [],
 'can_mod_post': False,
 'contest_mode': False,
 'created_utc': 1641473629,
 'domain': 'self.MicrosoftTeams',
 'full_link': 'https://www.reddit.com/r/MicrosoftTeams/comments/rxdxsp/teams_room_with_mixed_devices_possible_lenovo/',
 'gildings': {},
 'id': 'rxdxsp',
 'is_created_from_ads_ui': False,
 'is_crosspostable': True,
 'is_meta': False,
 'is_original_content': False,
 'is_reddit_media_domain': False,
 'is_robot_indexable': True,
 'is_self': True,
 'is_video': False,
 'link_flair_background_color': '',
 'link_flair_richtext': [],
 'link_flair_text_color': 'dark',
 'link_flair_type': 'text',
 'locked': False,
 'media_only': False,
 'no_follow': False,
 'num_

In [49]:
import pandas as pd

In [50]:
df = pd.DataFrame(posts)

In [51]:
df[['subreddit', 'selftext', 'title']].head()

Unnamed: 0,subreddit,selftext,title
0,MicrosoftTeams,"Hello,\n\nDoes anyone have any experience in m...",Teams Room with mixed devices possible (Lenovo...
1,MicrosoftTeams,"Hello, I am experiencing these issues while I ...",Why are Teams so bad on Linux/in browser?
2,MicrosoftTeams,Has anyone else noticed the past couple of day...,Teams Voice Calls issue
3,MicrosoftTeams,,How To Add Bulk Members To Microsoft Teams
4,MicrosoftTeams,,Does anybody know of a Teams app that will all...


In [57]:
# Adapted from https://rareloot.medium.com/using-pushshifts-api-to-extract-reddit-submissions-fb517b286563
def getPushshiftData(after, before, sub):
    url = 'https://api.pushshift.io/reddit/search/submission/?subreddit=' \
            +str(sub)+'&size=1000&after='+str(after)+'&before='+str(before)
    
    print(url)
    r = requests.get(url)
    data = json.loads(r.text)
    return data['data']

In [58]:
def post_scrapper(data):
    date = [] #.created_utc
    title = [] #.title
    is_self = [] #.is_self
    selftext = [] # .selftext 
    upvotes = [] #.score
    upvote_ratio = [] #.upvote_ratio
    n_comments = [] #.num_comments
    permalink = [] #.permalink
    author = [] #.author
    
    for post in data:
        date.append(str(datetime.fromtimestamp(post['created_utc'])))
        title.append(post['title'])
        upvotes.append(post['score'])
        upvote_ratio.append(post['upvote_ratio'])
        n_comments.append(post['num_comments'])
        is_self.append(post['is_self'])
        try:
            selftext.append(post['selftext'])
        except:
            selftext.append('NA')
        author.append(post['author'])
        permalink.append(post['permalink'])
    
    df = pd.DataFrame({'date':date,
                  'title':title,
                  'selftext':selftext,
                  'is_self':is_self,
                  'upvotes':upvotes,
                  'upvote_ratio': upvote_ratio,
                  'n_comments':n_comments,
                  'permalink':permalink,
                  'author':author})
    
    return df

In [59]:
def parse_posts(after, before, sub):
    
    # Initialise list
    list_of_dfs = []
    data = getPushshiftData(after, before, sub)
    
    while len(data) > 0:
        current_df = post_scrapper(data)
        # Calls getPushshiftData() with the created date of the last submission
        print(len(data))
        print(str(datetime.fromtimestamp(data[-1]['created_utc'])))
        after = data[-1]['created_utc']
        data = getPushshiftData(after, before, sub)
        list_of_dfs.append(post_scrapper(data))
        
    return list_of_dfs

In [63]:
# Setting date slightly further back to ensure all posts in timeframe are captured
zoom_post = parse_posts('1622553957', '1638365157', 'Zoom')

https://api.pushshift.io/reddit/search/submission/?subreddit=Zoom&size=1000&after=1622553957&before=1638365157
100
2021-06-07 14:30:26
https://api.pushshift.io/reddit/search/submission/?subreddit=Zoom&size=1000&after=1623047426&before=1638365157
100
2021-06-13 09:53:35
https://api.pushshift.io/reddit/search/submission/?subreddit=Zoom&size=1000&after=1623549215&before=1638365157
100
2021-06-18 02:39:11
https://api.pushshift.io/reddit/search/submission/?subreddit=Zoom&size=1000&after=1623955151&before=1638365157
100
2021-06-23 09:29:21
https://api.pushshift.io/reddit/search/submission/?subreddit=Zoom&size=1000&after=1624411761&before=1638365157
100
2021-07-01 19:15:16
https://api.pushshift.io/reddit/search/submission/?subreddit=Zoom&size=1000&after=1625138116&before=1638365157
100
2021-07-10 06:47:56
https://api.pushshift.io/reddit/search/submission/?subreddit=Zoom&size=1000&after=1625870876&before=1638365157
100
2021-07-18 10:19:29
https://api.pushshift.io/reddit/search/submission/?subr

In [64]:
zoom_v = pd.concat(zoom_post, ignore_index=True)

In [65]:
zoom_v.head()

Unnamed: 0,date,title,selftext,is_self,upvotes,upvote_ratio,n_comments,permalink,author
0,2021-06-07 17:06:00,How to migrate from Slack to Zoom Chat,"Mio Engineering Director, Ed James, has laid o...",1.0,1.0,1.0,4.0,/r/Zoom/comments/nu7xi1/how_to_migrate_from_sl...,DominicMio
1,2021-06-07 18:58:23,Leveraging Zoom’s Video SDK for Your Business ...,,0.0,1.0,1.0,2.0,/r/Zoom/comments/nu9kkg/leveraging_zooms_video...,rtcwebusa
2,2021-06-07 19:23:36,I need help in understanding cloud meeintgs,"Can i ask? \nAfter the cloud file is saved, w...",1.0,1.0,1.0,5.0,/r/Zoom/comments/nu9zth/i_need_help_in_underst...,Jenkyrs
3,2021-06-07 21:42:31,troll this meeting please https://meet.google....,[removed],1.0,1.0,1.0,0.0,/r/Zoom/comments/nucuoj/troll_this_meeting_ple...,ResponsibilityNo4367
4,2021-06-08 01:34:08,Zoom is taking a very long time to respond to ...,,0.0,1.0,1.0,6.0,/r/Zoom/comments/nuid1k/zoom_is_taking_a_very_...,Various-Vegetable-10


In [66]:
zoom_v.to_csv('zoom_v.csv', index=False)

In [67]:
# Setting date slightly further back to ensure all posts in timeframe are captured
team_post = parse_posts('1622553957', '1638365157', 'MicrosoftTeams')

https://api.pushshift.io/reddit/search/submission/?subreddit=MicrosoftTeams&size=1000&after=1622553957&before=1638365157
100
2021-06-07 17:52:42
https://api.pushshift.io/reddit/search/submission/?subreddit=MicrosoftTeams&size=1000&after=1623059562&before=1638365157
100
2021-06-10 22:46:46
https://api.pushshift.io/reddit/search/submission/?subreddit=MicrosoftTeams&size=1000&after=1623336406&before=1638365157
100
2021-06-16 02:55:29
https://api.pushshift.io/reddit/search/submission/?subreddit=MicrosoftTeams&size=1000&after=1623783329&before=1638365157
100
2021-06-18 02:37:17
https://api.pushshift.io/reddit/search/submission/?subreddit=MicrosoftTeams&size=1000&after=1623955037&before=1638365157
100
2021-06-23 20:07:25
https://api.pushshift.io/reddit/search/submission/?subreddit=MicrosoftTeams&size=1000&after=1624450045&before=1638365157
100
2021-06-28 23:59:51
https://api.pushshift.io/reddit/search/submission/?subreddit=MicrosoftTeams&size=1000&after=1624895991&before=1638365157
100
2021-

In [68]:
team_v = pd.concat(team_post, ignore_index=True)

In [69]:
team_v.head()

Unnamed: 0,date,title,selftext,is_self,upvotes,upvote_ratio,n_comments,permalink,author
0,2021-06-07 19:21:27,Teams invitation via Desktop Outlook in anothe...,I've seen this being asked several times. Ever...,1.0,1.0,1.0,3.0,/r/MicrosoftTeams/comments/nu9ykm/teams_invita...,AriHD
1,2021-06-07 19:30:11,Teams Contacts Issues,"Hi all,\n\nI've recently started using Teams f...",1.0,1.0,1.0,6.0,/r/MicrosoftTeams/comments/nua41j/teams_contac...,Afraid-Bread
2,2021-06-07 20:02:31,Route calls to a Voice-App instead of a call g...,Hello! Is there an option to route calls to a ...,1.0,1.0,1.0,1.0,/r/MicrosoftTeams/comments/nuapnt/route_calls_...,monkeyape
3,2021-06-07 22:18:02,Microsoft Teams for iOS adds webinars support ...,,0.0,1.0,1.0,0.0,/r/MicrosoftTeams/comments/nudncn/microsoft_te...,IT_PRO_21
4,2021-06-07 22:19:50,Recording a PowerPoint Live presentation doesn...,My colleague wishes to record her Presentation...,1.0,1.0,1.0,2.0,/r/MicrosoftTeams/comments/nudor3/recording_a_...,DarrenOL83


In [70]:
team_v.to_csv('team_v.csv', index=False)