In [2]:
# This exercise will focus on parsing data in json format.
# We are accessing the Ebay API to perform a search for 'iphone', and we want to compile just some 
# of the returned information in another json file.
# First we'll import the necessary modules:
import json
from api import get_data # this is not a python module! It's our api.py file.
import os
import pprint # used to pretty print big python dictionaries

# Now we'll call the external API using a function in our api.py file
source = get_data("https://svcs.ebay.com/services/search/FindingService/v1?OPERATION-NAME=findItemsByKeywords&SERVICE-VERSION=1.0.0&RESPONSE-DATA-FORMAT=JSON&callback=_cb_findItemsByKeywords&REST-PAYLOAD&keywords=iPhone&paginationInput.entriesPerPage=6&GLOBAL-ID=EBAY-US&siteid=0&SECURITY-APPNAME=xotikota-pythonpo-PRD-87f3df848-3e35dfb4")
print(source)

b'/**/_cb_findItemsByKeywords({"findItemsByKeywordsResponse":[{"ack":["Success"],"version":["1.13.0"],"timestamp":["2018-12-21T13:57:52.347Z"],"searchResult":[{"@count":"6","item":[{"itemId":["382231636268"],"title":["Apple iPhone 7 32\\/128\\/256GB Factory Unlocked AT&T Sprint Verizon T-Mobile"],"globalId":["EBAY-US"],"subtitle":["US SELLER - 12 MONTH WARRANTY - FREE SHIPPING!"],"primaryCategory":[{"categoryId":["9355"],"categoryName":["Cell Phones & Smartphones"]}],"galleryURL":["http:\\/\\/thumbs1.ebaystatic.com\\/pict\\/04040_0.jpg"],"viewItemURL":["http:\\/\\/www.ebay.com\\/itm\\/Apple-iPhone-7-32-128-256GB-Factory-Unlocked-AT-T-Sprint-Verizon-T-Mobile-\\/382231636268?var=0"],"paymentMethod":["PayPal"],"autoPay":["true"],"postalCode":["30101"],"location":["Acworth,GA,USA"],"country":["US"],"shippingInfo":[{"shippingServiceCost":[{"@currencyId":"USD","__value__":"0.0"}],"shippingType":["Free"],"shipToLocations":["Worldwide"],"expeditedShipping":["true"],"oneDayShippingAvailable":["

In [4]:
# Let's clean the outcome and turn it into a valid json object:
x = str(source)
cleaned_source = str(x[:-2]).replace("b'/**/_cb_findItemsByKeywords(", "")
# Let's get a glimpse of the outcome:
print(cleaned_source)

{"findItemsByKeywordsResponse":[{"ack":["Success"],"version":["1.13.0"],"timestamp":["2018-12-21T13:57:52.347Z"],"searchResult":[{"@count":"6","item":[{"itemId":["382231636268"],"title":["Apple iPhone 7 32\\/128\\/256GB Factory Unlocked AT&T Sprint Verizon T-Mobile"],"globalId":["EBAY-US"],"subtitle":["US SELLER - 12 MONTH WARRANTY - FREE SHIPPING!"],"primaryCategory":[{"categoryId":["9355"],"categoryName":["Cell Phones & Smartphones"]}],"galleryURL":["http:\\/\\/thumbs1.ebaystatic.com\\/pict\\/04040_0.jpg"],"viewItemURL":["http:\\/\\/www.ebay.com\\/itm\\/Apple-iPhone-7-32-128-256GB-Factory-Unlocked-AT-T-Sprint-Verizon-T-Mobile-\\/382231636268?var=0"],"paymentMethod":["PayPal"],"autoPay":["true"],"postalCode":["30101"],"location":["Acworth,GA,USA"],"country":["US"],"shippingInfo":[{"shippingServiceCost":[{"@currencyId":"USD","__value__":"0.0"}],"shippingType":["Free"],"shipToLocations":["Worldwide"],"expeditedShipping":["true"],"oneDayShippingAvailable":["false"],"handlingTime":["1"]}]

In [5]:
# Let's save the outcome into a json file, so that we can inspect it (you wouldn't do this on a production environment, but it helps a lot when developing):
with open("outcome.json", "w") as json_file:
    print(cleaned_source, file=json_file)

In [5]:
# Try to open the file. Whoa! The json has an error! Let's find it using an online tool: https://jsoneditoronline.org/
# We found the error, so let's fix it (not in our file, but in our cleaned_source variable):
cleaned_source = cleaned_source.replace('\\\\"', '')
print(cleaned_source)

{"findItemsByKeywordsResponse":[{"ack":["Success"],"version":["1.13.0"],"timestamp":["2018-12-21T13:57:52.347Z"],"searchResult":[{"@count":"6","item":[{"itemId":["382231636268"],"title":["Apple iPhone 7 32\\/128\\/256GB Factory Unlocked AT&T Sprint Verizon T-Mobile"],"globalId":["EBAY-US"],"subtitle":["US SELLER - 12 MONTH WARRANTY - FREE SHIPPING!"],"primaryCategory":[{"categoryId":["9355"],"categoryName":["Cell Phones & Smartphones"]}],"galleryURL":["http:\\/\\/thumbs1.ebaystatic.com\\/pict\\/04040_0.jpg"],"viewItemURL":["http:\\/\\/www.ebay.com\\/itm\\/Apple-iPhone-7-32-128-256GB-Factory-Unlocked-AT-T-Sprint-Verizon-T-Mobile-\\/382231636268?var=0"],"paymentMethod":["PayPal"],"autoPay":["true"],"postalCode":["30101"],"location":["Acworth,GA,USA"],"country":["US"],"shippingInfo":[{"shippingServiceCost":[{"@currencyId":"USD","__value__":"0.0"}],"shippingType":["Free"],"shipToLocations":["Worldwide"],"expeditedShipping":["true"],"oneDayShippingAvailable":["false"],"handlingTime":["1"]}]

In [6]:
# Not very helpful, so let's turn that into a python dictionary and make it a little more human readable:
data = json.loads(cleaned_source)
pprint.pprint(data)

{'findItemsByKeywordsResponse': [{'ack': ['Success'],
                                  'itemSearchURL': ['http:\\/\\/www.ebay.com\\/sch\\/i.html?_nkw=iPhone&_ddo=1&_ipg=6&_pgn=1'],
                                  'paginationOutput': [{'entriesPerPage': ['6'],
                                                        'pageNumber': ['1'],
                                                        'totalEntries': ['19638893'],
                                                        'totalPages': ['3273149']}],
                                  'searchResult': [{'@count': '6',
                                                    'item': [{'autoPay': ['true'],
                                                              'condition': [{'conditionDisplayName': ['Seller '
                                                                                                      'refurbished'],
                                                                             'conditionId': ['2500']}],
     

In [15]:
# Ok, now we want to parse this dictionary, retrieve only the 'title' and '__value__' from each search result and 
# save them to a new dictionary.
# First of, let's create an empty dictionary:
parsed = dict()
parsed2 = dict()
i = 0
# Now it's up to you to loop the 'data' dictionary, retrieve the elements, and add them to 
# the 'parsed' dictionary.

In [17]:
for result in data['findItemsByKeywordsResponse'][0]['searchResult'][0]['item']:
    if result.get('title') != None:
        title = result['title'][0]
        price = result['sellingStatus'][0]['currentPrice'][0]['__value__']
        parsed[title] = price
        # If there's products with the same name, python will update the dictionary instead of appending, resulting in fewer results
        # than expected (see the result of the first print below, only 4 results of the 6 available).
        # So an alternative if to save a dictionary of lists:
        temp = []
        temp.extend([title, price])
        parsed2[i] = temp
        i += 1
# first print
pprint.pprint(parsed)
print('-' * 30)
# second print
pprint.pprint(parsed2)

{'Apple iPhone 6s Plus 16GB 32GB 64GB 128Gb Factory Unlocked AT&T Verizon Sprint': '199.99',
 'Apple iPhone 7 32GB GSM Unlocked Smartphone': '279.99',
 'Apple iPhone 7 32\\/128\\/256GB Factory Unlocked AT&T Sprint Verizon T-Mobile': '234.99',
 'Apple iPhone 7 GSM (Unlocked) Verizon AT&T TMobile Sprint 32GB 128GB 256GB': '249.89'}
------------------------------
{0: ['Apple iPhone 7 32\\/128\\/256GB Factory Unlocked AT&T Sprint Verizon '
     'T-Mobile',
     '234.99'],
 1: ['Apple iPhone 7 GSM (Unlocked) Verizon AT&T TMobile Sprint 32GB 128GB '
     '256GB',
     '249.89'],
 2: ['Apple iPhone 7 32GB GSM Unlocked Smartphone', '279.99'],
 3: ['Apple iPhone 7 32GB GSM Unlocked Smartphone', '279.99'],
 4: ['Apple iPhone 7 32GB GSM Unlocked Smartphone', '279.99'],
 5: ['Apple iPhone 6s Plus 16GB 32GB 64GB 128Gb Factory Unlocked AT&T Verizon '
     'Sprint',
     '199.99']}


In [13]:
# As extra info, imagine that you needed to save this file into a folder. It's always a good practice 
# to first check if the folder exists, and create it if necessary.
if not os.path.exists('result'):
    os.makedirs('result')
    
# Let's save the 'parsed' dictionary to a json file.
with open('result/search_results.json', 'w') as new_file:
    json.dump(parsed, new_file)