# Get link roundups

This notebook uses the Bing web search API to get the link roundup posts from Economist's View, an economics blog that regularly published links to economics writing from 2008 to 2019. The result is a csv with urls to these link-roundup posts: EV_links.csv

In [59]:
import http.client, urllib.request, urllib.parse, urllib.error, base64, json
from urllib.parse import urlparse
from keys import bing as key
import pandas as pd

In [60]:
def multiple_searches(value,site_list,max_results):
    #Takes a search term, site list, max results
    #Uses offset in Bing to do multiple searches, 50 results at a time
    count = 0
    all_links = []
    while count < max_results:
        results = bing(value,site_list,key,offset=count)
        for a in results:
            all_links.append(a)
        count += 50
        
    return all_links

def bing(value,site_list, key, offset):
    #Takes a search term (value), list of sites to search against, and bing api key
    #Returns list of urls
    #Documentation https://dev.cognitive.microsoft.com/docs/services/f40197291cd14401b93a478716e818bf/operations/56b4447dcf5ff8098cef380d
    
    if value != None:
        term = str(value)
        term = limit_search(term,site_list)
    
        number_of_results = 50
        #offset=0
        news=False
        #News = True means a Bing news search, else web search
    
        headers = {
            # Request headers
            'Ocp-Apim-Subscription-Key': key,
        }
    
        params = urllib.parse.urlencode({
            # Request parameters
            'q': term,
            'count': number_of_results,
            'offset': offset,
            'mkt': 'en-us',
            'safeSearch': 'Moderate'
        })
        
        try:
            conn = http.client.HTTPSConnection('api.cognitive.microsoft.com')
            conn.request("GET", "/bing/v7.0/search?%s" % params, "{body}", headers)
            response = conn.getresponse()
            data = response.read()
            conn.close()
            
            results = json.loads(data)
            #Return articles
            articles = []
            count = 0
    
            while count < number_of_results:
                if news == True:
                    url = results['value'][count]['url']
                else:
                    try:
                        url = results['webPages']['value'][count]['url']
                        articles.append(url)

                    except:
                        print("Error with Bing result")
                    
                count +=1
                
            print("One search done")
                        
            #return "Links! {}".format(str(articles))
            return articles
        
        except Exception as e:
            print("[Errno {0}] {1}".format(e.errno, e.strerror))
            print(e)
            
            
def limit_search(term,sites):
    #Concatenates site list to suit Bing API
    count = 0
    sites_string = ' ('
    if len(sites) > 0:
        while count < len(sites):
            if count == len(sites) - 1:
                site = 'site:'+str(sites[count])
                sites_string = sites_string + site
            else:
                site = 'site:'+str(sites[count])+' OR '
                sites_string = sites_string + site
            count +=1
        term = term + sites_string 
    return term

In [61]:
#Get link recommendations from Economist's View
sites = ['http://economistsview.typepad.com/']
year = 2008
all_links = []
while year < 2020:
    term = "Links for " + str(year)
    urls = multiple_searches(term,sites,365)
    year += 1
    all_links = all_links + urls
    
print(len(all_links))

Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
One search done
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
One search done
Error with Bing result
One search done
Error with Bing result
Error with Bing result
One search done
Error with Bing result
One search done
One search done
One search done
One search done
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
One search done
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
One search done
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with B

Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
One search done
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
One search done
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
One search done
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
Error with Bing result
One search done
Error with Bing 

In [62]:
#Create a dataframe
df = pd.DataFrame(columns = ["URL"])
#For all EV links, if 'links' in the url add to dataframe
for b in all_links:
    if 'links' in b:
        df.loc[len(df)] = b

#Print dataframe shape, save to CSV
print(df.shape)
df.to_csv("EV_links.csv")

(1092, 1)
