# Code for retrieving earnings and analyst revision news from RavenPack

This shows how we have retrieved the analyst revisions news and the earnings announcements timestamps.

In [1]:
import pandas as pd
from datetime import datetime, timedelta, time

In [3]:
# keep the relevant columns
dj_cols = ['TIMESTAMP_UTC', 'RPNA_DATE_UTC', 'RP_ENTITY_ID',
           'ENTITY_NAME','RELEVANCE', 'NEWS_TYPE',
           'GROUP', 'TYPE', 'SUB_TYPE', 'CATEGORY',
           'SOURCE', 'RP_STORY_EVENT_INDEX',
           'RP_STORY_ID', 'EVENT_SIMILARITY_KEY']

# load the data -- here is an exmaple from loading the 2011 ravenpack file
dj_equity = pd.read_csv('RPNA_DJEdition_2011_4.0-Equities.gz',
                        usecols=dj_cols,
                        encoding= "ISO-8859-1")

# convert the timestamp UTC timezone to US/Eastern timezone
dj_equity['TIMESTAMP_UTC'] = pd.to_datetime(dj_equity['TIMESTAMP_UTC'])
dj_equity['TIMESTAMP'] = dj_equity.set_index(['TIMESTAMP_UTC']).index.tz_localize('UTC').tz_convert('US/Eastern')
dj_equity = dj_equity[['TIMESTAMP', 'RPNA_DATE_UTC', 'RP_ENTITY_ID',
                       'ENTITY_NAME', 'SOURCE', 'GROUP', 'SUB_TYPE',
                       'CATEGORY', 'RELEVANCE', 'RP_STORY_ID',
                       'EVENT_SIMILARITY_KEY']]

# select all analyst revisions news
analyst_ratings = dj_equity[dj_equity['CATEGORY'].isin(['analyst-ratings-change-negative',
                                                        'analyst-ratings-change-positive',
                                                        'analyst-ratings-change-neutral',
                                                        'analyst-ratings-change-negative-rater',
                                                        'analyst-ratings-change-positive-rater',
                                                        'analyst-ratings-change-neutral-rater'])]
# select earnings announcement news
# Note: there can be multiple earnings news for one firm on its earnings announcement day.
# Make sure to keep news that are after-hours and select the first news with relevance score == 100.
earnings = dj_equity[(dj_equity['GROUP']=='earnings') &
                     (dj_equity['CATEGORY'].isin(['earnings-per-share']))]