## Goal
Create support lists for the app. For example, list of comics and urls to be used by the app in dropdowns, visualizations, etc.

In [1]:
import pandas as pd
import requests
import random
import time
import os

# Data storage
from sqlalchemy import create_engine  # SQL helper
import keys  # Custom keys lib
import psycopg2 as psql  # PostgreSQL DBs

In [2]:
!ls

__init__.py
[34m__pycache__[m[m
[34marchive[m[m
[34massets[m[m
comic_recs.py
comic_scraper.py
comics_rx-01_data_prep.ipynb
comics_rx-02_eda.ipynb
comics_rx-03a_spark_eda_prep.ipynb
comics_rx-03b_model_als.ipynb
comics_rx-04_poc_matrices_recs.ipynb
comics_rx-05_create_app_support_data.ipynb
comics_rx-06_scrape_comic_covers.ipynb
comics_rx-07_log_image_urls.ipynb
data_fcns.py
[34mdeprecated[m[m
[34mdrivers[m[m
keys.py
[34mraw_data[m[m
[34mreferences[m[m
[34mscratch[m[m
[34msupport_data[m[m
[34mversions[m[m


In [3]:
path = './dev/support_data/comics.json'

In [45]:
comics = pd.read_json(path, lines=True)

In [46]:
comic_ids = comics['comic_id'].tolist()

In [47]:
comic_titles = comics['comic_title'].tolist()

In [48]:
comic_ids[:4]

[1, 2, 3, 4]

In [49]:
comic_titles[:10]

['0Secret Wars (Marvel)',
 '100 Bullets Brother Lono (DC)',
 '100 Penny Press Locke & Key (IDW)',
 '100 Penny Press Star Trek (IDW)',
 '100 Penny Press Thunder Agent (IDW)',
 '100 Penny Press Tmnt (IDW)',
 '100th Anniversary Special (Marvel)',
 '12 Reasons To Die (Other)',
 '13 Coins (Other)',
 '13th Artifact One Sho (Topcow)']

In [50]:
!pwd

/Users/werlindo/Dropbox/flatiron/capstone/comics_rx/comrx


In [2]:
# Define path to secret
secret_path_aws = os.path.join(os.environ['HOME'], '.secret',
                               'aws_ps_flatiron.json')
secret_path_aws

'/Users/werlindo/.secret/aws_ps_flatiron.json'

In [3]:
aws_keys = keys.get_keys(secret_path_aws)
user = aws_keys['user']
ps = aws_keys['password']
host = aws_keys['host']
db = aws_keys['db_name']

aws_ps_engine = ('postgresql://' + user + ':' + ps + '@' + host + '/' + db)

# Setup PSQL connection
conn = psql.connect(
    database=db,
    user=user,
    password=ps,
    host=host,
    port='5432'
)

In [4]:
# Instantiate cursor
cur = conn.cursor()

In [5]:
query = """SELECT * FROM comics limit 10;"""

In [6]:
#  Count records.
query = """
    SELECT ct.*, c.comic_id, c.img_url 
    from comic_trans ct
    inner join comics c on ct.comic_title = c.comic_title;
"""

In [7]:
# Execute the query
cur.execute(query)

In [8]:
# Check results
temp_df = pd.DataFrame(cur.fetchall())
temp_df.columns = [col.name for col in cur.description]

temp_df.head(3)

Unnamed: 0,index,publisher,item_id,title_and_num,qty_sold,date_sold,account_num,comic_title,comic_id,img_url
0,2,Amaze Ink Slave Labor Graphics,DCD151935,Filler Bunny #2,1,2011-08-14 18:01:03,174,Filler Bunny (SLG),2291,https://comrx.s3-us-west-2.amazonaws.com/cover...
1,3,Amaze Ink Slave Labor Graphics,DCD341726,Gargoyles #6,1,2012-06-22 14:11:37,593,Gargoyles (SLG),2445,https://comrx.s3-us-west-2.amazonaws.com/cover...
2,4,Amaze Ink Slave Labor Graphics,DCD416182,Royal Historian of Oz #1,1,2010-07-21 14:03:07,226,Royal Historian of Oz (SLG),5178,https://comrx.s3-us-west-2.amazonaws.com/cover...


In [9]:
# Zombie Tramp is annoying.
temp_df = temp_df[~temp_df["comic_title"].str.contains('Zombie Tramp')]

In [10]:
qtys = (temp_df.groupby(['comic_id', 'comic_title', 'img_url'], as_index=False)
        .qty_sold.sum().sort_values(by=['qty_sold'], ascending=False).copy())

In [11]:
top_100 = qtys.head(100)

In [12]:
top_100 = (top_100.sort_values(by=['comic_title'], ascending=True).
           loc[:, ['comic_id', 'comic_title', 'img_url']]).copy()

In [13]:
pd.set_option('display.max_colwidth', -1)

### Export back out to csv

In [16]:
top_100.to_csv('../webapp/templates/dev_files/top_100_comics.csv',
               index=False)

In [17]:
!head ../webapp/templates/dev_files/top_100_comics.csv

comic_id,comic_title,img_url
81,Action Comics (DC),https://comrx.s3-us-west-2.amazonaws.com/covers/action_comics.jpg
198,All New X-Men (Marvel),https://comrx.s3-us-west-2.amazonaws.com/covers/all_new_x-men.jpg
224,Amazing Spider-Man (Marvel),https://comrx.s3-us-west-2.amazonaws.com/covers/amazing_spider-man.jpg
278,Animal Man (DC),https://comrx.s3-us-west-2.amazonaws.com/covers/animal_man.jpg
319,Aquaman (DC),https://comrx.s3-us-west-2.amazonaws.com/covers/aquaman.jpg
388,Astonishing X-Men (Marvel),https://comrx.s3-us-west-2.amazonaws.com/covers/astonishing_x-men.jpg
433,Avengers (2010) (Marvel),https://comrx.s3-us-west-2.amazonaws.com/covers/avengers_(2010).jpg
455,Avengers (Marvel),https://comrx.s3-us-west-2.amazonaws.com/covers/avengers.jpg
552,Batgirl (DC),https://comrx.s3-us-west-2.amazonaws.com/covers/batgirl.jpg


### Test: Make lists
...from `app.py`'s perspective

In [74]:
coms = pd.read_csv('./webapp/templates/dev_files/top_100_comics.csv')

In [75]:
coms.head()

Unnamed: 0,comic_id,comic_title,img_url
0,81,Action Comics (DC),https://comrx.s3-us-west-2.amazonaws.com/covers/action_comics.jpg
1,198,All New X-Men (Marvel),https://comrx.s3-us-west-2.amazonaws.com/covers/all_new_x-men.jpg
2,224,Amazing Spider-Man (Marvel),https://comrx.s3-us-west-2.amazonaws.com/covers/amazing_spider-man.jpg
3,278,Animal Man (DC),https://comrx.s3-us-west-2.amazonaws.com/covers/animal_man.jpg
4,319,Aquaman (DC),https://comrx.s3-us-west-2.amazonaws.com/covers/aquaman.jpg


In [47]:
ids = coms['comic_id'].tolist()

In [48]:
titles = coms['comic_title'].tolist()

In [49]:
ids[:10]

[81, 198, 224, 278, 319, 388, 433, 455, 552, 610]