In [1]:
from typing import List
import requests
import json
from datetime import datetime
import time
import csv
import os 

from OpenPermID import OpenPermID

TEMP_API_KEY = "Kf1fmqa3XaGGGsh6wMw5OPlYgsHA1FTz"

In [2]:
'''

The following code takes a url in the form of a single news article or a rss feed of multiple news articles
and extracts data related to financial research needs in concordance with refinitiv's reference data and
methodlogies. 

Technical Notes
1. This code uses OpenPermID library from refinitiv as the upgraded way to retreive data from permid.org, hence installing OpenPermID will be neccssary
2. The code writes directly to output.csv but you can also opt to print the list by executing print(list).
3. Several print options are available throughout the code to review steps.

Financial Markets Notes & Enhancements
1. Enhancement: I would omit any permid's related to "LSEG", "Refinitiv" or "Thomson Reuters" from permList, as LSEG has a news arm (Reuters News) and the output has a lot of noise from associated tagging that will not be related to the article content in most cases. 
2. Ticker symbol is NULL in some cases as some entities are not publicly traded and hence would not typically have a ticker
3. IPO Date (date of Initial Public Offering) is NULL in some cases as not all entities will have been pblicly traded or is currently publicly traded. 

The following resources were used to research usability of permID & Intelligent Tagging by Calais

Intelligent Tagging - RESTful API - Calais service 
https://developers.lseg.com/en/api-catalog/open-perm-id/intelligent-tagging-restful-api/documentation#soon-to-be-mandatory-x-calais-selective-tags-header

Intelligent Tagging - RESTful API - understanding input headers
https://developers.lseg.com/en/api-catalog/open-perm-id/intelligent-tagging-restful-api/documentation/manuals-and-guides/input-headers

'''

'\n\nThe following code takes a url in the form of a single news article or a rss feed of multiple news articles\nand extracts data related to financial research needs in concordance with refinitiv\'s reference data and\nmethodlogies. \n\nTechnical Notes\n1. This code uses OpenPermID library from refinitiv as the upgraded way to retreive data from permid.org, hence installing OpenPermID will be neccssary\n2. The code writes directly to output.csv but you can also opt to print the list by executing print(list).\n3. Several print options are available throughout the code to review steps.\n\nFinancial Markets Notes & Enhancements\n1. Enhancement: I would omit any permid\'s related to "LSEG", "Refinitiv" or "Thomson Reuters" from permList, as LSEG has a news arm (Reuters News) and the output has a lot of noise from associated tagging that will not be related to the article content in most cases. \n2. Ticker symbol is NULL in some cases as some entities are not publicly traded and hence wou

In [4]:
#urlMultiArt = 'https://news.google.com/rss/search?q=when:48h+allinurl:reuters.com&ceid=US:en&hl=en-US&gl=US'
#urlSingleArt = 'https://finance.yahoo.com/m/76f35fcb-6373-34b1-b8c4-90f18b06bd1b/exciting-news-for-tesla-stock.html'

def get_company_csv_list(from_article: str) -> List[str]:
    # 1. Content, enter in the article or rss feed url when asked as from_article
    url = from_article
    headers = {}
    HTMLResponse = requests.request('GET', url, headers=headers)
    contentText = HTMLResponse.text
    headType = 'text/html'
    #print(contentText)

    # 2. Pass blob to refinitiv calais intelligent tagging service
    url = 'https://api-eit.refinitiv.com/permid/calais'
    payload = contentText.encode('utf8')
    headers = {
        'Content-Type': headType,
        'x-ag-access-token': TEMP_API_KEY,
        'x-calais-selectiveTags': 'Company,IPO,CompanyTicker',
        'outputformat': 'application/json'
        }
    
    TRITResponse = requests.request('POST', url, data=payload, headers=headers)
    TRITTextResponse = TRITResponse.text
    #print(TRITTextResponse)

    # 3. Create a list of perm id's and associated tickers from the response
    JSONResponse = json.loads(TRITResponse.text)
    #print(json.dumps(JSONResponse, indent=4, sort_keys=True))

    permList = []

    for entity in JSONResponse:
        for info in JSONResponse[entity]:
            if info == 'resolutions':
                for companyinfo in JSONResponse[entity][info]:
                    if 'permid' in companyinfo:
                        perm_id = "1-" + str(companyinfo['permid'])
                    else:
                        # Handle the case where permid is not present
                        perm_id = None

                    if 'ticker' in companyinfo:
                        ticker = companyinfo['ticker']
                    else:
                        # If 'ticker' is not present, set it to NULL
                        ticker = 'NULL'

                    # Add the PERMID and Ticker combination to the list
                    permList.append({'permid': perm_id, 'ticker': ticker})

    #print(permList)

    # 4. Use OpenPermID to retreive 2 more required data points, ipo date (if available) & organisation name, write all to csv ordered by permid
    opid = OpenPermID()
    opid.set_access_token(TEMP_API_KEY)

    file_name = f"news_companies_{datetime.now()}" #strftime('%d/%m/%Y %H:%M:%S')


    with open(fr'{file_name}.csv', 'w', newline='') as csvfile:
        fieldnames = ['perm_id', 'company_name_quoted', 'ticker', 'ipo_date_only']
        writer = csv.writer(csvfile)

        companies_sorted = sorted(permList, key=lambda x: x['permid'])

        # Loop through the list of permid's from permList
        for company in companies_sorted:
            perm_id = company['permid']
            ticker = company['ticker']

            # Perform OpenPermID lookup
            output, err = opid.lookup(perm_id, format='json-ld')

            # Parse json string
            data = json.loads(output)

            # Extract required data & accomdoate for possible NULLs
            perm_id = data.get("tr-common:hasPermId")
            ipo_datetime = data.get("hasIPODate")
            company_name = data.get("vcard:organization-name")
            ticker = ticker = "NULL" if ticker is None else ticker

            # Handle additional NULL values for dates & if available extract date only
            ipo_date_only = "NULL" if ipo_datetime is None else datetime.strptime(ipo_datetime, "%Y-%m-%dT%H:%M:%SZ").strftime("%Y-%m-%d")

            # Add quotes to Organisation name
            company_name_quoted = "'"+company_name+"'"

            # Create specifically formatted data for printing here
            company_list = perm_id+","+"'"+company_name+"'"+","+ticker+","+ipo_date_only
            
            time.sleep(1)
            
            #Print company_list with 1 second sleep inbetween calls to OpenPermID
            #print(company_list)
            
            #Write company_list to output..csv with 1 second sleep inbetween calls to OpenPermID
            writer.writerow([perm_id, company_name_quoted, ticker, ipo_date_only])

if __name__ == '__main__':

    from_article = input()

    result = get_company_csv_list(from_article)

    #fptr.write('\n'.join(result))
    #fptr.write('\n')

    #fptr.close()