In [1]:
from rauth import OAuth2Service
import json
import pandas as pd
import requests
import random

In [2]:
# Read the api key and secret from a .txt file
keyFile = open('keys.txt', 'r')
client_id = keyFile.readline().rstrip()
client_secret= keyFile.readline().rstrip()
keyFile.close()

In [3]:
# To avoid creating an extra .txt file 
# paste your api key and secret here directly 

# client_id = "YOUR API KEY HERE"
# client_secret="YOUR SECRET HERE"

### Get the access token

In [4]:
class ExampleOAuth2Client:
    def __init__(self, client_id, client_secret):
        self.access_token = None

        self.service = OAuth2Service(
            client_id=client_id,
            client_secret=client_secret,
            access_token_url="https://api.petfinder.com/v2/oauth2/token",
            authorize_url="https://api.petfinder.com/v2/oauth2/token",
            base_url="https://api.petfinder.com/"
        )

        self.get_access_token()

    def get_access_token(self):
        data = {'grant_type': 'client_credentials'}

        session = self.service.get_auth_session(data=data, decoder=json.loads)

        self.access_token = session.access_token

In [5]:
e = ExampleOAuth2Client(client_id, client_secret)
e.get_access_token()

In [6]:
token = e.access_token

#### Make a request to the Petfinder server (example)

In [7]:
# Set Parameters
typ = 'cat'
page = 1
before = '2020-12-31T00:00:00Z'
after = '2020-01-01T00:00:00Z'
limit = 100

In [8]:
headers = {'Authorization': 'Bearer '+ token}

# URL here is not finalized --> need to decide time range and type
# Using dog as an example
url = f'https://api.petfinder.com/v2/animals?type={typ}&page={page}&limit={limit}&before={before}&after={after}&status=adopted'

In [9]:
r = requests.get(url, headers=headers).json()

#### Counting number of animals

In [10]:
total_animals = lambda per_page, total_pages : per_page * total_pages
def print_total_animals(before, after, total_animals):
    print(f"Total number of animals between {after[:10]} and {before[:10]}: {total_animals}")

In [11]:
total_pages = r['pagination']['total_pages']
per_page = limit
print_total_animals(before, after, total_animals(per_page, total_pages))

Total number of animals between 2020-01-01 and 2020-12-31: 298900


#### Get all data from the API and save as .json

In [12]:
def make_req(page, typ, status):
    url = url = f'https://api.petfinder.com/v2/animals?type={typ}&page={page}&limit={limit}&before={before}&after={after}&status={status}'
    r = requests.get(url, headers=headers).json()
    return r

def get_all_data(typ, status='adoptable', upper_page_lim=0):
    try:
        url = f'https://api.petfinder.com/v2/animals?type={typ}&page={page}&limit={limit}&before={before}&after={after}&status={status}'
        r = requests.get(url, headers=headers).json()
        total_pages = r['pagination']['total_pages']
    except:
        print('error occured...')
        print(r)
        return
    
    res = r['animals']
    if not upper_page_lim:
        print(f'there should be around {limit * total_pages} {typ}s...')
        for p in range(2, total_pages + 1):
            r = make_req(p, status)
            res.extend(r['animals'])
    else:
        print(f'there are around {limit * total_pages} {typ}s...')
        print(f'taking random {upper_page_lim} pages from the api...')
        for p in random.sample(range(2, total_pages + 1), upper_page_lim):
            r = make_req(p, typ, status)
            res.extend(r['animals'])
    return res

def save_as_json(res, name):
    with open(f'{name}.json', 'w') as fp:
        json.dump(res, fp)
        
def read_from_json(file_name):
    with open(f'{file_name}') as f:
        data = json.load(f)
    return data

In [20]:
# these two cells are pulling data from api and save as json file
# run about 5 mins


# res_adoptable_dog = get_all_data('dog')
# save_as_json(res_adoptable_dog, 'dog_data_adoptable_2020')
# res_adoptable_cat = get_all_data('cat')
# save_as_json(res_adoptable_cat, 'cat_data_adoptable_2020')

In [14]:
# res_adopted_dog = get_all_data('dog', 'adopted', 160)
res_adopted_cat = get_all_data('cat', 'adopted', 200)

there are around 298900 cats...
taking random 200 pages from the api...


In [15]:
# save_as_json(res_adopted_dog, 'dog_data_adopted_2020')
save_as_json(res_adopted_cat, 'cat_data_adopted_2020')