#### Goal
Get images linked back to their titles

In [1]:
import sys
import os
import pandas as pd
import boto3

# Data storage
from sqlalchemy import create_engine  # SQL helper
import psycopg2 as psql  # PostgreSQL DBs

sys.path.append("..")

In [11]:
# So that items in pandas columns don't show truncated values 
pd.set_option('display.max_colwidth', -1)

In [2]:
import keys
import data_fcns as dfc

Images directory:

I set up an s3 bucket: https://comrx.s3-us-west-2.amazonaws.com/covers/

## Setup

In [3]:
# Define path to secret
secret_path_aws = os.path.join(os.environ['HOME'], '.secret',
                               'aws_ps_flatiron.json')
secret_path_aws

aws_keys = keys.get_keys(secret_path_aws)
user = aws_keys['user']
ps = aws_keys['password']
host = aws_keys['host']
db = aws_keys['db_name']

aws_ps_engine = ('postgresql://' + user + ':' + ps + '@' + host + '/' + db)

# Setup PSQL connection
conn = psql.connect(
    database=db,
    user=user,
    password=ps,
    host=host,
    port='5432'
)

In [4]:
# Instantiate cursor
cur = conn.cursor()

## Add column for url of larger images

In [5]:
#  Count records.
query_alter = """
    ALTER TABLE comics 
    ADD COLUMN lrg_img_url VARCHAR;
"""

tables = """
SELECT table_name
  FROM information_schema.tables
 WHERE table_schema='public'
   AND table_type='BASE TABLE';
   """

# Execute the query
cur.execute(query_alter)

conn.commit()

## Check new column

In [7]:
#  Count records.
query = """
    SELECT * FROM comics 
    LIMIT 10;
"""

# Execute the query
cur.execute(query)

conn.commit()

In [8]:
# Check results
temp_df = pd.DataFrame(cur.fetchall())
temp_df.columns = [col.name for col in cur.description]

In [12]:
temp_df.head(3)

Unnamed: 0,comic_id,comic_title,img_url,lrg_img_url
0,17,1 For $1 Axe Cop Bad Guy Eart (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg,
1,20,1 For $1 Conan the Barbarian (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg,
2,22,1 For $1 Mass Effect Foundati (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg,


## Set new column to old column, with new covers path

In [13]:
#  Count records.
query_update = """
    UPDATE comics 
    SET lrg_img_url = REPLACE(img_url, '/covers/', '/covers_large/')
    ;
"""

# Execute the query
cur.execute(query_update)

conn.commit()

In [14]:
#  Count records.
query = """
    SELECT * FROM comics 
    LIMIT 10;
"""

# Execute the query
cur.execute(query)

conn.commit()

In [15]:
# Check results
temp_df = pd.DataFrame(cur.fetchall())
temp_df.columns = [col.name for col in cur.description]

In [16]:
temp_df.head(3)

Unnamed: 0,comic_id,comic_title,img_url,lrg_img_url
0,235,Amber Blake (IDW),https://comrx.s3-us-west-2.amazonaws.com/covers/amber_blake.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/amber_blake.jpg
1,358,Armor Hunters Bloodshot (Other),https://comrx.s3-us-west-2.amazonaws.com/covers/armor_hunters_bloodshot.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/armor_hunters_bloodshot.jpg
2,500,A Year of Marvels Unbeatable (Marvel),https://comrx.s3-us-west-2.amazonaws.com/covers/a_year_of_marvels_unbeatable.jpg,https://comrx.s3-us-west-2.amazonaws.com/covers_large/a_year_of_marvels_unbeatable.jpg


### 2019.08.12 - Work ends for now, Just needed to create a new column to store path to /covers_large/ folder. Now need to update comics_factors pickle file.

We need a crosswalk from `comic_title` to `img_url`. For that we need to rebuild the `title` I've been using to search on images, because that's what I've been using to label the jpegs.

In [31]:
#  Count records.
query = """
    SELECT * from comic_trans;
"""

In [32]:
# Execute the query
cur.execute(query)

In [33]:
# Check results
temp_df = pd.DataFrame(cur.fetchall())
temp_df.columns = [col.name for col in cur.description]

In [34]:
temp_df.head(3)

Unnamed: 0,index,publisher,item_id,title_and_num,qty_sold,date_sold,account_num,comic_title
0,2,Amaze Ink Slave Labor Graphics,DCD151935,Filler Bunny #2,1,2011-08-14 18:01:03,174,Filler Bunny (SLG)
1,3,Amaze Ink Slave Labor Graphics,DCD341726,Gargoyles #6,1,2012-06-22 14:11:37,593,Gargoyles (SLG)
2,4,Amaze Ink Slave Labor Graphics,DCD416182,Royal Historian of Oz #1,1,2010-07-21 14:03:07,226,Royal Historian of Oz (SLG)


In [35]:
temp_df['title'] = (temp_df['title_and_num'].apply(dfc.cut_issue_num))

In [36]:
temp_df.head()

Unnamed: 0,index,publisher,item_id,title_and_num,qty_sold,date_sold,account_num,comic_title,title
0,2,Amaze Ink Slave Labor Graphics,DCD151935,Filler Bunny #2,1,2011-08-14 18:01:03,174,Filler Bunny (SLG),Filler Bunny
1,3,Amaze Ink Slave Labor Graphics,DCD341726,Gargoyles #6,1,2012-06-22 14:11:37,593,Gargoyles (SLG),Gargoyles
2,4,Amaze Ink Slave Labor Graphics,DCD416182,Royal Historian of Oz #1,1,2010-07-21 14:03:07,226,Royal Historian of Oz (SLG),Royal Historian of Oz
3,5,Amaze Ink Slave Labor Graphics,DCD416182,Royal Historian of Oz #1,1,2010-07-14 19:49:40,399,Royal Historian of Oz (SLG),Royal Historian of Oz
4,6,Amaze Ink Slave Labor Graphics,DCD416182,Royal Historian of Oz #1,1,2010-07-19 10:39:04,237,Royal Historian of Oz (SLG),Royal Historian of Oz


In [37]:
temp_df['title'] = (temp_df['title'].apply(lambda x: x.replace('&', 'and'))
                    .apply(lambda x: x.replace('?', ''))
                    .apply(lambda x: x.replace('/', ' '))
                    )

In [38]:
temp_df.head()

Unnamed: 0,index,publisher,item_id,title_and_num,qty_sold,date_sold,account_num,comic_title,title
0,2,Amaze Ink Slave Labor Graphics,DCD151935,Filler Bunny #2,1,2011-08-14 18:01:03,174,Filler Bunny (SLG),Filler Bunny
1,3,Amaze Ink Slave Labor Graphics,DCD341726,Gargoyles #6,1,2012-06-22 14:11:37,593,Gargoyles (SLG),Gargoyles
2,4,Amaze Ink Slave Labor Graphics,DCD416182,Royal Historian of Oz #1,1,2010-07-21 14:03:07,226,Royal Historian of Oz (SLG),Royal Historian of Oz
3,5,Amaze Ink Slave Labor Graphics,DCD416182,Royal Historian of Oz #1,1,2010-07-14 19:49:40,399,Royal Historian of Oz (SLG),Royal Historian of Oz
4,6,Amaze Ink Slave Labor Graphics,DCD416182,Royal Historian of Oz #1,1,2010-07-19 10:39:04,237,Royal Historian of Oz (SLG),Royal Historian of Oz


Create working table so don't have to re-pull from DB if mistake is made.

In [39]:
work = temp_df.groupby(['comic_title', 'title'], as_index=False).index.count()

In [40]:
work.head()

Unnamed: 0,comic_title,title,index
0,0Secret Wars (Marvel),0Secret Wars,1
1,1 For $1 Abe Sapien (Dark Horse),1 For $1 Abe Sapien,3
2,1 For $1 Action Philosopher (Dark Horse),1 For $1 Action Philosopher,3
3,1 For $1 Avatar Last Airbende (Dark Horse),1 For $1 Avatar Last Airbende,2
4,1 For $1 Axe Cop Bad Guy Eart (Dark Horse),1 For $1 Axe Cop Bad Guy Eart,1


In [41]:
work.shape

(7202, 3)

In [42]:
work['filename'] = work['title'].apply(
    lambda x: x.lower().replace(' ', '_')) + '.jpg'

In [44]:
search_path = 'https://comrx.s3-us-west-2.amazonaws.com/covers/'
search_path

'https://comrx.s3-us-west-2.amazonaws.com/covers/'

In [45]:
work['search_path'] = search_path + work['filename']

In [153]:
work.loc[work['title'] == 'Wonder Woman']['search_path']

7011    https://comrx.s3-us-west-2.amazonaws.com/cover...
7012    https://comrx.s3-us-west-2.amazonaws.com/cover...
Name: search_path, dtype: object

In [162]:
work.loc[work['comic_title'] == 'Wonder Woman (DC)']['filename'].values[0]

'wonder_woman.jpg'

In [46]:
work.head()

Unnamed: 0,comic_title,title,index,filename,search_path
0,0Secret Wars (Marvel),0Secret Wars,1,0secret_wars.jpg,https://comrx.s3-us-west-2.amazonaws.com/cover...
1,1 For $1 Abe Sapien (Dark Horse),1 For $1 Abe Sapien,3,1_for_$1_abe_sapien.jpg,https://comrx.s3-us-west-2.amazonaws.com/cover...
2,1 For $1 Action Philosopher (Dark Horse),1 For $1 Action Philosopher,3,1_for_$1_action_philosopher.jpg,https://comrx.s3-us-west-2.amazonaws.com/cover...
3,1 For $1 Avatar Last Airbende (Dark Horse),1 For $1 Avatar Last Airbende,2,1_for_$1_avatar_last_airbende.jpg,https://comrx.s3-us-west-2.amazonaws.com/cover...
4,1 For $1 Axe Cop Bad Guy Eart (Dark Horse),1 For $1 Axe Cop Bad Guy Eart,1,1_for_$1_axe_cop_bad_guy_eart.jpg,https://comrx.s3-us-west-2.amazonaws.com/cover...


In [202]:
tests = ['Wonder Woman (DC)', 'Batman (DC)', 'X-Men (Marvel)', 'Werlindo']

In [47]:
client = boto3.client('s3')

In [178]:
ttl = 'wonder woman (dc)'
url = 'www.overhere.com'

In [188]:
ssql = ("SELECT * FROM '" + ttl + "';" +
        "WHERE huh ")

In [189]:
ssql

"SELECT * FROM 'wonder woman (dc)';WHERE huh "

Great! Let's make a full list of all comics titles.

In [257]:
title_search = list(work['comic_title'])

conn.rollback()

In [None]:
dfc.update_urls(tgt_titles, client, work, curr, conn)

## Manual URL updater

In [4]:
# Instantiate cursor
cur = conn.cursor()

In [None]:
#  Count records.
query = """
    SELECT * from comics LIMIT 100;
"""

#  Count records.
query_alter = """
    ALTER TABLE comics 
    ADD COLUMN img_url VARCHAR;
"""

tables = """
SELECT table_name
  FROM information_schema.tables
 WHERE table_schema='public'
   AND table_type='BASE TABLE';
   """

ALTER TABLE table_name
ADD COLUMN new_column_name data_type

# Execute the query
cur.execute(query_alter)

# Execute the query
cur.execute(query)

conn.commit()

# Check results
temp_df = pd.DataFrame(cur.fetchall())
temp_df.columns = [col.name for col in cur.description]

In [49]:
url = 'https://comrx.s3-us-west-2.amazonaws.com/covers/avengers_(2010).jpg'

In [50]:
d = dfc.update_manual_img_url('Avengers (2010) (Marvel)', url, conn)

In [51]:
d

Unnamed: 0,comic_id,comic_title,img_url
0,433,Avengers (2010) (Marvel),https://comrx.s3-us-west-2.amazonaws.com/covers/avengers_(2010).jpg


In [16]:
q = """select * from comics where comic_title like '%Wicked%';"""

In [21]:
cur.execute(q)

In [22]:
temp_df = pd.DataFrame(cur.fetchall())
temp_df.columns = [col.name for col in cur.description]
temp_df

Unnamed: 0,comic_id,comic_title,img_url
0,6930,Wicked & Divine 1373 Cvr B Ke (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg
1,6937,Wicked & Divine (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg
2,6932,Wicked & Divine 1923 Cvr A Mc (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg
3,6934,Wicked & Divine 455 Ad (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg
4,6936,Wicked & Divine Funnies (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg
5,3265,Image Firsts Wicked & Divine (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg
6,6929,Wicked & Divine 1373 Cvr A Mc (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg
7,6931,Wicked & Divine 1831 One Sho (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg
8,6935,Wicked & Divine Christmas Ann (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg
9,6933,Wicked & Divine 1923 Cvr B Ko (Image),https://comrx.s3-us-west-2.amazonaws.com/covers/_no_cover_.jpg
