# Automated blogging example
This notebook helps assemble and compose blog posts based on media composed from other sources.
It leverages different APIs and other freely available web resources and composes
them into posts, in this example making city specific reocmmendations. To run this notebook you'd have to supply
your own API keys for the various services.

# Go down this list and create blog posts for each:
https://en.wikipedia.org/wiki/List_of_United_States_cities_by_population

In [None]:
import argparse
import io
import os
import json
import re
import requests
import sys
import urllib
from urllib2 import HTTPError
from urllib import quote
from urllib import urlencode
import wikipedia


from PIL import Image
from resizeimage import resizeimage
from bs4 import BeautifulSoup
from summa.summarizer import summarize

from IPython.core.display import display, HTML
display(HTML('<h1>This is how we display HTML</h1>'))

In [None]:
# Constants and API keys
# City Name
city = "Dallas"
city_shorthand = "Dallas"
YELP_LOCATION = 'Dallas'
wiki_city = "Dallas" # Wikipedia page
url = "https://www.tripadvisor.com/Attractions-g55711-Activities-Dallas_Texas.html"
base_url = 'https://www.tripadvisor.com/Attractions-g55711-Activities-'
location_url = 'Dallas_Texas.html'

user_agent = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3'
headers = { 'User-Agent' : user_agent }

YELP_SEARCH_TERM = 'cocktail'
YELP_HOTEL_TERM = 'hotels'

# API Keys
YELP_API_KEY="Get your own"
MAPS_API_KEY="Get your own"

In [None]:
cd ..

In [None]:
# Make a directory to store all the data we'll get
try:
    os.mkdir(city_shorthand)
except Exception, msg:
    print msg

### City Summary
Summarize city information pulled from wikipedia

In [None]:
wikipage = wikipedia.page(wiki_city)

In [None]:
content = wikipage.content

In [None]:
content = content.rstrip('\n')

### Sights to see
Generate a list of places to see, pulled from TA.

In [None]:
def dl_page_src(url):
    print(url)
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    with io.open('tripadvisor.html', 'w', encoding='utf-8') as saved_page:
        saved_page.write(soup.prettify(encoding='utf-8').decode('utf-8'))

def get_image_list(soup):
    # get all the script tags then get the one that contains the line
    # 'var lazyImgs'
    script_tags = soup.find_all('script')
    pattern = re.compile('var\s*?lazyImgs\s*?=\s*?(\[.*?\]);', re.DOTALL)
    
    for tag in script_tags:
        matches = pattern.search(tag.text)    
        if matches:
            image_list = json.loads(matches.group(1))
            return image_list

dl_page_src(base_url + location_url)

with io.open('tripadvisor.html', encoding='utf-8') as page_src:
    source = page_src.read()

soup = BeautifulSoup(source, 'html.parser')

# get the lazy loaded image list
image_list = get_image_list(soup)

In [None]:
print len(image_list) # Optionally pull these photos

In [None]:
activity_list = []
for tag in soup.findAll('div', attrs={"class":"item name"}):
    txt = str(tag.text)
    txt.replace('\n', '')
    txt.replace(' ', '')
    txt.rstrip('\r\n')
    clean = ' '.join([line.strip() for line in txt.strip().splitlines()])
    activity_list.append(clean)
    #print tag.next_sibling

### Eat and drink
Pull lists of places to eat and drink from the YELP API.

In [None]:
# Yelp Fusion no longer uses OAuth as of December 7, 2017.
# You no longer need to provide Client ID to fetch Data
# It now uses private keys to authenticate requests (API Key)
# You can find it on
# https://www.yelp.com/developers/v3/manage_app
YELP_API_HOST = 'https://api.yelp.com'
YELP_SEARCH_PATH = '/v3/businesses/search'
YELP_BUSINESS_PATH = '/v3/businesses/'

YELP_SEARCH_LIMIT = 3

def request(host, path, api_key, url_params=None):
    """Given your API_KEY, send a GET request to the API.

    Args:
        host (str): The domain host of the API.
        path (str): The path of the API after the domain.
        API_KEY (str): Your API Key.
        url_params (dict): An optional set of query parameters in the request.

    Returns:
        dict: The JSON response from the request.

    Raises:
        HTTPError: An error occurs from the HTTP request.
    """
    url_params = url_params or {}
    url = '{0}{1}'.format(host, quote(path.encode('utf8')))
    headers = {
        'Authorization': 'Bearer %s' % api_key,
    }

    print(u'Querying {0} ...'.format(url))

    response = requests.request('GET', url, headers=headers, params=url_params)

    return response.json()


def search(api_key, term, location):
    """Query the Search API by a search term and location.

    Args:
        term (str): The search term passed to the API.
        location (str): The search location passed to the API.

    Returns:
        dict: The JSON response from the request.
    """

    url_params = {
        'term': term.replace(' ', '+'),
        'location': location.replace(' ', '+'),
        'limit': YELP_SEARCH_LIMIT
    }
    return request(YELP_API_HOST, YELP_SEARCH_PATH, api_key, url_params=url_params)


def get_business(api_key, business_id):
    """Query the Business API by a business ID.

    Args:
        business_id (str): The ID of the business to query.

    Returns:
        dict: The JSON response from the request.
    """
    business_path = YELP_BUSINESS_PATH + business_id

    return request(YELP_API_HOST, business_path, api_key)


def query_api(term, location):
    """Queries the API by the input values from the user.

    Args:
        term (str): The search term to query.
        location (str): The location of the business to query.
    """
    response = search(YELP_API_KEY, term, location)

    businesses = response.get('businesses')

    if not businesses:
        print(u'No businesses for {0} in {1} found.'.format(term, location))
        return
    
    return businesses


In [None]:
json_data = query_api(YELP_SEARCH_TERM, YELP_LOCATION)

In [None]:
json_data

In [None]:
json_hotel_data = query_api('hotels', YELP_LOCATION)

In [None]:
json_hotel_data

In [None]:
bar_review_dict = { 4.0: ["It's OK if you're in a pinch.",
                      "We had a fun time, it's fine.",
                      "Good place."],
                4.5: ["Very nice place. There's some things they could do better, but not many.",
                      "Good place, we recommend it!",
                      "We had a great time."],
                5.0: ["Excellent, you have to go!",
                      "I would recommend it to anyone.",
                      "If you're in town, go here."]}
price_dict = {'$$': ["You get a lot for what you pay for!",
                     "Cheap and good.",
                     "Won't hurt your budget"],
              '$$$': ["Getting on the pricey side but still good.",
                      "A bit expensive",
                      "Not too bad."],
              '$$$$': ["Better bring your big wallet or purse.",
                       "On the fancier end in terms of price",
                       "A bit pricey."]}
hotel_review_dict = {'3.5': ["WAY too pricey for what you get. Sheesh!",
                             "Was not satisfied, the shower/tub combination was tiny and reminded me of one in motel 6 bathroom.",
                             "All in all, definitely not worth the price and I will not stay there again.  Plenty of other places to choose from in the area."
    
                            ], 
                     '4.0': ["A typically great experience. I stayed for a whirlwind visit and didn't get to spend much time in the room. Anyway everything was more or less to my liking.",
                             "I had a basic room and it was just right. The bed was comfortable, there was enough space for my clothes and suitcase, and best of all the bathroom was large and bright. I slept okay except for early morning neighborhood noise - not the hotels fault at all. Water pressure excellent in the shower.",
                             "Well stocked workout room. I had several great workouts here."
                            ],
                     '4.5': ["One of the better hotels I ever stayed and made me want to join their reward program if they have one. It felt luxurious, clean, big really love it.",
                             "The staff was nice and helpful. We liked our room, and it was very clean. The bathroom was nice also. The hallways are painted a dark color which makes it seem gloomy.",
                             "The only thing I did not like was the in-room cash bar and min fridge. I mean really. Who in their right mind is going to spend money on that crap. Take my advice: go to Walgreens for snacks and drinks, and use a cup with ice. All that stuff did was take up counter space, and believe me there is not any extra to go around."
                            ],
                     '5.0': ["The rooms are a decent size and the bathroom is nice as well. For a woman who travels a lot, this place is great.",
                             "I stayed here for work this week and was thoroughly impressed.  The room was exceptionally clean and quiet.  The bathroom was large and well-kept.  The hallway was noisy because a utility closet was at the end of the hall, but I didn't hear anything once I was in my room.",
                             "Our stay was outstanding. The staff was amazing and went out of their way. I'm beginning to wonder why I stay at Hiltons and Marriotts, that might be changing."
                            ]}

### Where to sleep
Put a spotlight on a rental from AirBnB

In [None]:
from geopy.geocoders import Nominatim
geolocator = Nominatim()
location = geolocator.geocode(city)

In [None]:
print location.latitude
print location.longitude

In [None]:
google_maps_string = "https://www.google.com/maps/embed/v1/place?q=" + str(location.latitude) + "," + str(location.longitude) + "&amp;key=" + MAPS_API_KEY

### Putting it all together
Put all the component pieces together into the blog post.

In [None]:
summary_text = summarize(content, words=500)

In [None]:
display(HTML(summary_text))
display(HTML("<h2>Things to do</h2> The following are just some of the most visited attractions in the area, enjoy!"))
activity_counting_list = []
for activity in activity_list:
    if activity in activity_counting_list:
        pass
    else:
        activity_counting_list.append(activity)
        try:
            summed = wikipedia.summary(activity, sentences=5)
            activity_summary = summarize(summed, words=100)
            activity_page = wikipedia.page(activity)
            try:
                for image in activity_page.images:
                    end = str(image)[-30:]
                    if ("Commons-logo" and "svg") not in end:
                        image_name = activity_page.title
                        urllib.urlretrieve(image, image_name)
                        # Resize this image to 300px wide by 250px tall
                        with open(image_name, 'r+b') as f:
                            with Image.open(f) as image:
                                cover = resizeimage.resize_cover(image, [300, 250])
                                cover.save("zzz" + image_name + '.jpg', image.format, quality=100)
                        break
            except Exception, msg:
                pass
            display(HTML("<strong>" + activity + "</strong>" + " " + activity_summary))
        except:
            pass
display(HTML("<h2>Where to grab a cocktail</h2>"))

for index, business in enumerate(json_data):
    display(HTML("<h3>Spotlight:</h3>" + "<h4>" + business['name'] + "</h4>"))
    display(HTML("<em>How are the prices?</em> " + price_dict.get(business['price'], "Cheap cheap cheap.")[index]))
    display(HTML("<em>What did you think?</em> " + bar_review_dict.get(business['rating'], "You should probably avoid it.")[index])) 
    image_name = "zzz" + city + "_" + business['id'] #Name them with zzz so we can grab easy
    urllib.urlretrieve(business['image_url'], image_name)
    # Resize this image to 300px wide by 250px tall
    with open(image_name, 'r+b') as f:
        with Image.open(f) as image:
            cover = resizeimage.resize_cover(image, [300, 250])
            cover.save("zzz" + image_name + '.jpg', image.format, quality=100)
    display(HTML("More reviews here" + business['url']))

display(HTML("<h2>If you want to stay over...</h2>"))

for index, business in enumerate(json_hotel_data):
    display(HTML("<h3>Spotlight:</h3>" + "<h4>" + business['name'] + "</h4>"))
    display(HTML("<em>How are the prices?</em> " + price_dict.get(business['price'], "Cheap cheap cheap.")[index]))
    display(HTML("<em>What did you think?<em> " + hotel_review_dict.get(str(business['rating']), "You should probably avoid it.")[index])) 
    image_name = "zzz" + city + "_" + business['id'] #Name them with zzz so we can grab easy
    urllib.urlretrieve(business['image_url'], image_name)
    # Resize this image to 300px wide by 250px tall
    with open(image_name, 'r+b') as f:
        with Image.open(f) as image:
            cover = resizeimage.resize_cover(image, [300, 250])
            cover.save("zzz" + image_name + '.jpg', image.format, quality=100)
    display(HTML("More reviews here" + business['url']))
    

In [None]:
display(HTML("<iframe width='100%' height='450' frameborder='20' style='border:line' src=" + google_maps_string + "></iframe>"))

### Manipulating other images

In [None]:
# Set the remaining images to 500px wide by 400px
for index, image in enumerate(wikipage.images):
    end = str(image)[-50:]
    try:
        if "Montage" or "Skyline" or "Center" in end:
            featured_image = urllib.urlretrieve(wikipage.images[index], "featured_raw_" + str(index))
            # Set this to 600px wide by 900
            with open("featured_raw_" + str(index), 'r+b') as f:
                with Image.open(f) as image:
                    cover = resizeimage.resize_cover(image, [600, 900])
                    cover.save("zzz" + city_shorthand + '_featured_' + str(index) + '.jpg', image.format, quality=100)

        if city_shorthand in end:
            image_name = city_shorthand + "_" + str(index)
            urllib.urlretrieve(wikipage.images[index], image_name)
            with open(image_name, 'r+b') as f:
                with Image.open(f) as image:
                    try:
                        cover = resizeimage.resize_cover(image, [500, 450])
                        cover.save("zzz" + image_name + '.jpg', image.format, quality=100)
                    except Exception, msg:
                        print msg
                        cover = resizeimage.resize_cover(image, [300, 250])
                        cover.save("zzz" + image_name + '.jpg', image.format, quality=100)
    except Exception, msg:
        print msg
