In [1]:
import requests

In [2]:
with open(".credentials", "r") as f:
    key = f.read().strip("\n")

In [3]:
url = f'https://api.nytimes.com/svc/news/v3/content/all/all/24.json?api-key={key}'
r = requests.get(url)

In [4]:
resp = r.json()

In [9]:
import json
with open("t.json", "a") as t:
    t.write(json.dumps(resp['results'][0]))

In [20]:
import re
regexday = re.compile(r"(\d{4}-\d{2}-\d{2})")
regexhour = re.compile(r"(\d{2}:\d{2}:\d{2})")
times = [(re.findall(regexday, art['updated_date'])[0], re.findall(regexhour, art['updated_date'])[0]) for art in resp['results']]
times

[('2019-03-27', '11:32:04'),
 ('2019-03-27', '11:32:12'),
 ('2019-03-27', '11:28:07'),
 ('2019-03-27', '11:32:27'),
 ('2019-03-27', '11:32:16'),
 ('2019-03-27', '11:20:49'),
 ('2019-03-27', '11:19:46'),
 ('2019-03-27', '11:06:32'),
 ('2019-03-27', '11:06:30'),
 ('2019-03-27', '11:01:31'),
 ('2019-03-27', '10:37:19'),
 ('2019-03-27', '10:44:51'),
 ('2019-03-27', '10:35:36'),
 ('2019-03-27', '10:31:31'),
 ('2019-03-27', '10:17:10'),
 ('2019-03-27', '10:15:24'),
 ('2019-03-27', '10:06:25'),
 ('2019-03-27', '10:01:36'),
 ('2019-03-27', '10:01:35'),
 ('2019-03-27', '10:01:35')]

In [6]:
resp["results"][0:5]

[{'slug_name': '18SOCIALQS',
  'section': 'Style',
  'subsection': '',
  'title': 'My Sister-in-Law Is Messing Up Our Financial Plans',
  'abstract': 'When separate bank accounts support one spouse’s burden, are they really separate?',
  'url': 'https://www.nytimes.com/2019/04/18/style/married-separate-finances.html',
  'byline': 'By PHILIP GALANES',
  'thumbnail_standard': 'https://static01.nyt.com/images/2011/07/28/fashion/social_inline/social_inline-thumbStandard-v3.jpg',
  'item_type': 'Article',
  'source': 'The New York Times',
  'updated_date': '2019-04-18T10:30:03-04:00',
  'created_date': '2019-04-18T10:30:03-04:00',
  'published_date': '2019-04-17T20:00:00-04:00',
  'first_published_date': '2019-04-18T10:28:33-04:00',
  'material_type_facet': 'News',
  'kicker': 'Social Q’s',
  'subheadline': None,
  'des_facet': '',
  'org_facet': ['Customs, Etiquette and Manners'],
  'per_facet': '',
  'geo_facet': '',
  'related_urls': None,
  'multimedia': [{'url': 'https://static01.nyt.c

## About the New York Times API
Limit is 4000 requests per day (at a rate of 1 every 120 seconds for the Newswire API, I am well-within the limit)

### NYT Semantic API

Constructing a Semantic API Request by Concept Type and Specific Concept
concept_type is one of four types:

`nytd_geo` for a location

`nytd_per` for a person

`nytd_org` for an organization

`nytd_des` for a descriptor

In [65]:
for i in resp['results']:
    print("-"*100)
    print(i['title'])
    print("\t" + str(i['des_facet']))
    print("\t" + str(i['org_facet']))
    print("\t" + str(i['per_facet']))
    print("\t" + str(i['geo_facet']))
    print("-"*100)

----------------------------------------------------------------------------------------------------
L.S.U. Reinstates Coach Implicated in Recruiting Scandal
	['Basketball (College)']
	['Louisiana State University']
	['Wade, Will (1982- )']
	
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
Their Tax Rate Is 0%
	['Corporate Taxes', 'Federal Taxes (US)', 'Russian Interference in 2016 US Elections and Ties to Trump Associates', 'Taxation']
	['Tax Cuts and Jobs Act (2017)', 'Tax Credits, Deductions and Exemptions', 'Income Inequality']
	['Mueller, Robert S III', 'Trump, Donald J', 'Cohen, Michael D (1966- )']
	
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
A Boxing Promoter’s Trickiest

In [151]:
def getSemantic(concept, concept_type, key = key):
    
    '''query the NewYorkTimes semantic API'''
    
    types = {'des':'nytd_des', 'geo':'nytd_geo', 'org':'nytd_org', 'per':'nytd_per'}
    
    if concept_type not in types:
        raise ValueError(f"concept_type must be one of {types}")
        
    # build query
    url = f'http://api.nytimes.com/svc/semantic/v2/concept/name/{types[concept_type]}/{concept}.json?fields=all&api-key={key}'
    
    # query the API, return JSON (as a python dict)
    result_dic = requests.get(url)
    if result_dic.status_code != 200:
        print("Something went wrong...")
    
    return result_dic.json()['results'][0]

In [152]:
kansas = getSemantic('Kansas', 'geo')
kansas

{'concept_id': 26120,
 'concept_name': 'Kansas',
 'is_times_tag': 1,
 'is_sensitive': 0,
 'concept_status': 'Active',
 'vernacular': 'Kansas',
 'concept_type': 'nytd_geo',
 'concept_created': '2009-10-28 14:30:04-04:00',
 'concept_updated': '2014-04-11 18:05:52-04:00',
 'concept_rule': None,
 'concept_gender': None,
 'links': [{'concept_id': 26120,
   'concept_name': 'Kansas',
   'concept_status': 'Active',
   'is_times_tag': 1,
   'concept_type': 'nytd_geo',
   'link_id': 1715084,
   'relation': 'sameAs',
   'link': 'Kansas',
   'link_type': 'wikipedia_raw_name',
   'mapping_type': ''},
  {'concept_id': 26120,
   'concept_name': 'Kansas',
   'concept_status': 'Active',
   'is_times_tag': 1,
   'concept_type': 'nytd_geo',
   'link_id': 1594732,
   'relation': 'sameAs',
   'link': 'http://rdf.freebase.com/ns/en.kansas',
   'link_type': 'freebase_uri',
   'mapping_type': ''},
  {'concept_id': 26120,
   'concept_name': 'Kansas',
   'concept_status': 'Active',
   'is_times_tag': 1,
   'con

In [155]:
for k, v in kansas.items():
    print(k)
    print("-"*len(k))
    print(str(v) + "\n")

concept_id
----------
26120

concept_name
------------
Kansas

is_times_tag
------------
1

is_sensitive
------------
0

concept_status
--------------
Active

vernacular
----------
Kansas

concept_type
------------
nytd_geo

concept_created
---------------
2009-10-28 14:30:04-04:00

concept_updated
---------------
2014-04-11 18:05:52-04:00

concept_rule
------------
None

concept_gender
--------------
None

links
-----
[{'concept_id': 26120, 'concept_name': 'Kansas', 'concept_status': 'Active', 'is_times_tag': 1, 'concept_type': 'nytd_geo', 'link_id': 1715084, 'relation': 'sameAs', 'link': 'Kansas', 'link_type': 'wikipedia_raw_name', 'mapping_type': ''}, {'concept_id': 26120, 'concept_name': 'Kansas', 'concept_status': 'Active', 'is_times_tag': 1, 'concept_type': 'nytd_geo', 'link_id': 1594732, 'relation': 'sameAs', 'link': 'http://rdf.freebase.com/ns/en.kansas', 'link_type': 'freebase_uri', 'mapping_type': ''}, {'concept_id': 26120, 'concept_name': 'Kansas', 'concept_status': 'Active'

In [185]:
macron = getSemantic('Macron, Emmanuel (1977- )', 'per')

In [186]:
for k, v in macron.items():
    print(k)
    print("^"*len(k))
    print(str(v) + "\n")

concept_id
^^^^^^^^^^
1519088

concept_name
^^^^^^^^^^^^
Macron, Emmanuel (1977- )

is_times_tag
^^^^^^^^^^^^
1

is_sensitive
^^^^^^^^^^^^
0

concept_status
^^^^^^^^^^^^^^
Active

vernacular
^^^^^^^^^^
Emmanuel Macron

concept_type
^^^^^^^^^^^^
nytd_per

concept_created
^^^^^^^^^^^^^^^
2014-10-07 11:21:33-04:00

concept_updated
^^^^^^^^^^^^^^^
2017-04-24 09:54:26-04:00

concept_rule
^^^^^^^^^^^^
None

concept_gender
^^^^^^^^^^^^^^
None

teragram
^^^^^^^^
[{'concept_id': 1519088, 'extraction_rules_id': 3626380, 'extraction_trigger_term': 'Emmanuel Macron', 'extraction_condition_type': 'TGIF', 'extraction_condition': '{(OR,"centrist","Economic Minister","Economy Minister","Marine Le Pen","Minister of Economy",(AND,(OR,"France","French"),(OR,"President","Presidential")))}', 'extraction_descriptor': None, 'extraction_rule_created': '1969-12-31 19:00:00-05:00', 'extraction_rule_updated': '2017-04-24 09:54:26-04:00', 'tickerizations': [{'extraction_rules_id': 3626380, 'tickerization_id': 100

## Yago

Match people with `<isAffiliatedTo>`, `<hasWonPrize>`, `rdf:type`, `<graduatedFrom>`...


In [112]:
import json
import random
import requests
import re

In [103]:
def makeJSON(string):
    return json.loads(string.replace("'", '"').replace("None", '""'))

def tinyURL(url):
    r = requests.get(f"http://tinyurl.com/api-create.php?url={url}")
    return str(r.content, "utf-8")

In [27]:
with open("some_data.txt") as d:
    one_tweet = d.readline().strip("\n")

In [29]:
one_tweet = makeJSON(one_tweet)

In [121]:
def printTweet(tweet):
    '''Format NYT Newswire JSON data into a tweetable format.'''
    
    def hashtag(hasht):
        '''makes a list of potential hashtags from the NYT tags'''
        
        parentheses = re.compile(r"\((.+)\)")
        
        if isinstance(hasht, str):
            return []
        elif isinstance(hasht, list):
            return ["#" + re.sub(parentheses, "", h).lower().replace(" ", "") for h in hasht]
    
    title = tweet['title']
    abstract = tweet['abstract']
    url = tinyURL(tweet['url'])
    # shorten url with tinyURL API
    
    
    all_hashtags = hashtag(tweet['des_facet']) + hashtag(tweet['org_facet']) + \
                    hashtag(tweet['per_facet']) + hashtag(tweet['geo_facet'])
    
    
    # a tweet is made of title + url + hastags
    # followed by another tweet in reply to the first which is the abstract
    # both must be less than 240 characters
    
    first_twit = f'''{title}\n{url}'''
    total_length = len(first_twit)
    
    # keep track of already used hashtags
    used_hastags = set()
    
    # 20 tries in total
    n = 0
    while total_length < 240 and n < 20:
        # pick a hashtag at random
        candidate_hashtag = random.choice(all_hashtags)
        
        # append it to the tweet if it does not make it too big and has not been used
        if total_length + len(candidate_hashtag) < 240 and candidate_hashtag not in used_hastags:
            first_twit += " " + candidate_hashtag # add ht
            total_length += len(candidate_hashtag) # update length tweet
            used_hastags.add(candidate_hashtag) # update set of used ht
            
        n += 1 
    
    second_twit = abstract
    
    print(first_twit)
    print("\n", second_twit)

In [122]:
printTweet(one_tweet)

Want to Escape Global Warming? These Cities Promise Cool Relief
http://tinyurl.com/y3zrtvhk #urbanareas #refugeesanddisplacedpersons #buffalo #wildfires #hurricanesandtropicalstorms #environment #duluth #floods

 While climate change affects everywhere, some areas in America will be less affected than others. And some of those fortunate places, it happens, might be looking for people. 


# Misc

In [1]:
def printRoad(k):
    print("   ^||^   "*(k//10))
    print("^"*k)
    print("= "*(k//2))
    print("^"*k)
    
printRoad(100)

   ^||^      ^||^      ^||^      ^||^      ^||^      ^||^      ^||^      ^||^      ^||^      ^||^   
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
