In [1]:
import cfg
from nytimesarticle import articleAPI
search_api = articleAPI(cfg.NYT_SEARCH_TOKEN)

In [2]:
articles = search_api.search( q = 'Obama',
                      fq = {'headline':'Obama', 'source':['Reuters','AP', 'The New York Times']},
                      begin_date = 20111231 )

In [3]:
articles

{u'copyright': u'Copyright (c) 2013 The New York Times Company.  All Rights Reserved.',
 u'response': {u'docs': [{u'_id': u'57776e4d38f0d81f396c0545',
    u'abstract': None,
    u'blog': [],
    u'byline': {u'original': u'By MICHAEL D. SHEAR',
     u'person': [{u'firstname': u'Michael',
       u'organization': u'',
       u'rank': 1,
       u'role': u'reported'}]},
    u'document_type': u'article',
    u'headline': {u'main': u'Obama After Dark: The Precious Hours Alone',
     u'print_headline': u'Obama at Night: 7 Almonds and Some Precious Solitude'},
    u'keywords': [{u'is_major': u'N',
      u'name': u'persons',
      u'rank': u'1',
      u'value': u'Obama, Barack'},
     {u'is_major': u'N',
      u'name': u'subject',
      u'rank': u'2',
      u'value': u'United States Politics and Government'},
     {u'is_major': u'N',
      u'name': u'subject',
      u'rank': u'3',
      u'value': u'White House Building (Washington, DC)'}],
    u'lead_paragraph': u'President Obama has come to con

In [4]:
def parse_articles(articles):
    '''
    This function takes in a response to the NYT api and parses
    the articles into a list of dictionaries
    '''
    news = []
    for i in articles['response']['docs']:
        dic = {}
        dic['id'] = i['_id']
        if i['abstract'] is not None:
            dic['abstract'] = i['abstract'].encode("utf8")
        dic['headline'] = i['headline']['main'].encode("utf8")
        dic['desk'] = i['news_desk']
        dic['date'] = i['pub_date'][0:10] # cutting time of day.
        dic['section'] = i['section_name']
        if i['snippet'] is not None:
            dic['snippet'] = i['snippet'].encode("utf8")
        dic['source'] = i['source']
        dic['type'] = i['type_of_material']
        dic['url'] = i['web_url']
        dic['word_count'] = i['word_count']
        # locations
        locations = []
        for x in range(0,len(i['keywords'])):
            if 'glocations' in i['keywords'][x]['name']:
                locations.append(i['keywords'][x]['value'])
        dic['locations'] = locations
        # subject
        subjects = []
        for x in range(0,len(i['keywords'])):
            if 'subject' in i['keywords'][x]['name']:
                subjects.append(i['keywords'][x]['value'])
        dic['subjects'] = subjects   
        news.append(dic)
    return(news) 

In [5]:
parse_articles(articles)

[{'date': u'2016-07-03',
  'desk': u'National',
  'headline': 'Obama After Dark: The Precious Hours Alone',
  'id': u'57776e4d38f0d81f396c0545',
  'locations': [],
  'section': u'U.S.',
  'snippet': 'President Obama has come to consider the solitary hours after dark as essential as his time in the Oval Office. Barely getting five hours of sleep a night, he considers the time alone more important.',
  'source': u'The New York Times',
  'subjects': [u'United States Politics and Government',
   u'White House Building (Washington, DC)'],
  'type': u'News',
  'url': u'http://www.nytimes.com/2016/07/03/us/politics/obama-after-dark-the-precious-hours-alone.html',
  'word_count': u'1825'},
 {'abstract': 'Tod Williams and Billie Tsien, husband-and-wife modernist architecture team known for work on prominent cultural institutions, are selected to design Barack H Obama Presidential Library and Museum in Chicago.',
  'date': u'2016-07-01',
  'desk': u'National',
  'headline': 'Obama Picks New York