In [1]:
#dependency
import os 
import json
import petpy
import urllib
import time
import pandas as pd

import time
import giphy_client
from giphy_client.rest import ApiException
from pprint import pprint

#database 
from sqlalchemy import create_engine

#
from config import key, secret, api_key 
import config


## Petfinder API

In [2]:
#data collection and storing into the database
def get_cats():
    # initialized
    pf = petpy.Petfinder(key=key, secret=secret)
    
    
    # get cat data from api
    cat_df = pf.animals(animal_type='cat', status='adoptable', location='Miami, FL', distance=60, 
                        results_per_page=100, pages=8, return_df=True)
    
    
    # filtered the data few columns 
    new_cat_df = cat_df[['id','species','age','gender','animal_id','contact.address.city']]

    #set index to id
    new_cat_df = new_cat_df.set_index('id')
    
    #rename columns
    new_cat_df = new_cat_df.rename(columns={'contact.address.city':'city'})
    
    
    
    
  #--------Database Connection----------
    # Create a SQL Database connection
    connection_string = f"postgres:{config.password}@localhost:5432/pets_db"
    engine = create_engine(f'postgresql://{connection_string}')
    
    # checking tables
    engine.table_names()
    
    try:
        new_cat_df.to_sql(name="stage_cat_db", con=engine, if_exists="append", index=True)
        print("Data loaded successfully")
    
    except:
         print("Data has already been loaded to db")

    
    return 
get_cats()

Data loaded successfully


In [3]:
#data collection and storing into the database
def get_dogs():
    # initialized
    pf = petpy.Petfinder(key=key, secret=secret)
    
    
    # get cat data from api
    dog_df = pf.animals(animal_type='dog', status='adoptable', location='Miami, FL', distance=60, 
                        results_per_page=100, pages=5, return_df=True)
    
    
    # filtered the data few columns 
    new_dog_df = dog_df[['id','species','age','gender','animal_id','contact.address.city']]

    #reset index to id
    new_dog_df = new_dog_df.set_index('id')
    
    #rename columns
    new_dog_df = new_dog_df.rename(columns={'contact.address.city':'city'})
    
    
  #--------Database Connection----------
    # Create a SQL Database connection
    connection_string = f"postgres:{config.password}@localhost:5432/pets_db"
    engine = create_engine(f'postgresql://{connection_string}')
    
    # checking tables
    engine.table_names()
    
    try:
        new_dog_df.to_sql(name="stage_dog_db", con=engine, if_exists="append", index=True)
        print("Data loaded successfully")
    
    except:
        print("Data has already been loaded to db")

    
    return 
get_dogs()

Data loaded successfully


In [4]:
#data validation stet
def data_cleaning():
    
    # Create a SQL Database connection
    connection_string = f"postgres:{config.password}@localhost:5432/pets_db"
    engine = create_engine(f'postgresql://{connection_string}')
    
    #check for connection
#     engine.table_names()
    
    # handle the any duplicates from staging db to final dog_db, cat_db 
    engine.execute(
        """INSERT INTO dog_db 
        SELECT DISTINCT * FROM stage_dog_db
        ON CONFLICT (id) DO UPDATE SET
        species = EXCLUDED.species,
        age = EXCLUDED.age,
        gender = EXCLUDED.gender,
        animal_id = EXCLUDED.animal_id,
        city = EXCLUDED.city"""
    )
    
    engine.execute(
        """INSERT INTO cat_db 
        SELECT DISTINCT * FROM stage_cat_db
        ON CONFLICT (id) DO UPDATE SET
        species = EXCLUDED.species,
        age = EXCLUDED.age,
        gender = EXCLUDED.gender,
        animal_id = EXCLUDED.animal_id,
        city = EXCLUDED.city"""
    )

    
    

    return 
data_cleaning()

### Giphy API 

In [5]:
#dependency
import giphy_client
from giphy_client.rest import ApiException
from pprint import pprint

In [27]:
def get_giphy():
    #params
    api_instance = giphy_client.DefaultApi()
    api_key = config.api_key 
    limit = 100
    #increase offset by 100 each run 
    offset = 0 
    #empty list
    gif_url = []
    slug = []
    gif_id = []

    try: 
        # trending endpoint
        api_response = api_instance.gifs_trending_get(api_key, limit=limit)
        #pprint(api_response.data)

        api_res = api_response.data

        # iterate over the api response 
        for item in api_res:
#             print(item)

            
            gif_id.append(item.id)
            slug.append(item.slug)
            gif_url.append(item.bitly_gif_url)
            

    except ApiException as e:
        print("Exception when calling DefaultApi->gifs_search_get: %s\n" % e)
    
    
    #create df for the list items (slug and gif_url)
    giphy_df = pd.DataFrame({'slug':slug,
                            'gif_url':gif_url,'gif_id':gif_id})
    
#     #reset index to gif_url
    giphy_df = giphy_df.set_index('gif_id')

    
    #--------Database Connection----------
    # Create a SQL Database connection
    connection_string = f"postgres:{config.password}@localhost:5432/pets_db"
    engine = create_engine(f'postgresql://{connection_string}')
        
    # checking tables
    #engine.table_names()
    
    try:
        giphy_df.to_sql(name="stage_giphy_db", con=engine, if_exists="append", index=True)
        print("Data loaded successfully")
    
    except:
        print("Data has already been loaded to db")
        
    return 
get_giphy()

Data loaded successfully


In [None]:
## example 

#     #check the set against the cat_db id for duplicates 
#     id_set = set()

   

#     for item in new_cat_df.iterrows():
# #         print(item[1].id)

#         if item[1].id not in id_set:
#             new_cat_df_x.append(item)

#         id_set.add(item[1].id)


    
#     new_cat_df = pd.DataFrame(new_cat_df_x)
  