In [1]:
import mysql.connector as sql
import pandas as pd

In [2]:
conn = sql.connect(user ='root', host = 'localhost', password ='', database = 'books_db')
cur = conn.cursor()

In [3]:
cur.execute('SHOW TABLES')

for x in cur:
    print(x)

('books',)


In [4]:
cur.execute('SELECT * FROM books')
result = cur.fetchall()
for x in result:
    print(x)

(1, 'The Namesake', 'Jhumpa', 'Lahiri', 2003, 32, 291)
(2, 'Norse Mythology', 'Neil', 'Gaiman', 2016, 43, 304)
(3, 'American Gods', 'Neil', 'Gaiman', 2001, 12, 465)
(4, 'Interpreter of Maladies', 'Jhumpa', 'Lahiri', 1996, 97, 198)
(5, 'A Hologram for the King: A Novel', 'Dave', 'Eggers', 2012, 154, 352)
(6, 'The Circle', 'Dave', 'Eggers', 2013, 26, 504)
(7, 'The Amazing Adventures of Kavalier & Clay', 'Michael', 'Chabon', 2000, 68, 634)
(8, 'Just Kids', 'Patti', 'Smith', 2010, 55, 304)
(9, 'A Heartbreaking Work of Staggering Genius', 'Dave', 'Eggers', 2001, 104, 437)
(10, 'Coraline', 'Neil', 'Gaiman', 2003, 100, 208)
(11, 'What We Talk About When We Talk About Love: Stories', 'Raymond', 'Carver', 1981, 23, 176)
(12, "Where I'm Calling From: Selected Stories", 'Raymond', 'Carver', 1989, 12, 526)
(13, 'White Noise', 'Don', 'DeLillo', 1985, 49, 320)
(14, 'Cannery Row', 'John', 'Steinbeck', 1945, 95, 181)
(15, 'Oblivion: Stories', 'David', 'Foster Wallace', 2004, 172, 329)
(16, 'Consider t

In [5]:
pd.read_sql_query('SELECT * FROM books', conn).head()

Unnamed: 0,book_id,title,author_fname,author_lname,released_year,stock_quantity,pages
0,1,The Namesake,Jhumpa,Lahiri,2003,32,291
1,2,Norse Mythology,Neil,Gaiman,2016,43,304
2,3,American Gods,Neil,Gaiman,2001,12,465
3,4,Interpreter of Maladies,Jhumpa,Lahiri,1996,97,198
4,5,A Hologram for the King: A Novel,Dave,Eggers,2012,154,352


## String Functions

In [6]:
q1 = '''SELECT CONCAT(author_fname,' ',author_lname)  as full_name FROM books;'''
pd.read_sql_query(q1,conn).head(4)

Unnamed: 0,full_name
0,Jhumpa Lahiri
1,Neil Gaiman
2,Neil Gaiman
3,Jhumpa Lahiri


In [7]:
#concat_ws with seperator between all specified columns
q2 = '''SELECT CONCAT_WS(' - ',author_fname,author_lname) as full_name FROM books;'''
pd.read_sql_query(q2,conn).head(3)

Unnamed: 0,full_name
0,Jhumpa - Lahiri
1,Neil - Gaiman
2,Neil - Gaiman


In [8]:
#shorten book's title 
q3 = '''SELECT CONCAT(SUBSTR(title,1,10),'...') as short_title FROM books;'''
pd.read_sql_query(q3,conn).head(3)

Unnamed: 0,short_title
0,The Namesa...
1,Norse Myth...
2,American G...


In [9]:
q4 = '''SELECT REPLACE(author_fname, 'D', 'J') FROM books;'''
pd.read_sql_query(q4,conn).head()

Unnamed: 0,"REPLACE(author_fname, 'D', 'J')"
0,Jhumpa
1,Neil
2,Neil
3,Jhumpa
4,Jave


In [10]:
q5 = '''SELECT UPPER(author_lname) FROM books;'''
pd.read_sql_query(q5,conn).head()

Unnamed: 0,UPPER(author_lname)
0,LAHIRI
1,GAIMAN
2,GAIMAN
3,LAHIRI
4,EGGERS


## Aggregate functions

In [11]:
#How many books are in the database???
q6 = '''SELECT COUNT(*) as Total_books FROM books;'''
pd.read_sql_query(q6,conn).head()

Unnamed: 0,Total_books
0,16


In [12]:
#How many authors
q7 = '''SELECT COUNT(DISTINCT(author_fname)) as total_authors FROM books;'''
pd.read_sql_query(q7,conn).head()

Unnamed: 0,total_authors
0,9


In [13]:
#How many titles contain "the"?
q8 = '''SELECT COUNT(*) FROM books WHERE title LIKE '%the%';'''
pd.read_sql_query(q8,conn)

Unnamed: 0,COUNT(*)
0,5


In [14]:
#COUNT how many books each author has written
q9 = '''SELECT CONCAT(author_fname,' ',author_lname) AS author_name,
        COUNT(*) AS number_of_books
        FROM books
        GROUP BY 1
        ORDER BY 2 DESC'''

pd.read_sql_query(q9,conn)

Unnamed: 0,author_name,number_of_books
0,Neil Gaiman,3
1,Dave Eggers,3
2,Raymond Carver,2
3,David Foster Wallace,2
4,Jhumpa Lahiri,2
5,Don DeLillo,1
6,John Steinbeck,1
7,Michael Chabon,1
8,Patti Smith,1


In [15]:
#longest book
q10 = '''SELECT pages FROM books
        ORDER BY pages DESC
        LIMIT 1'''

pd.read_sql_query(q10,conn)    

Unnamed: 0,pages
0,634


In [16]:
#Find the year each author published their first book
q11 = '''SELECT CONCAT(author_fname,' ', author_lname) as author_name, MIN(released_year) FROM books
        GROUP BY 1'''
pd.read_sql_query(q11, conn)

Unnamed: 0,author_name,MIN(released_year)
0,Dave Eggers,2001
1,David Foster Wallace,2004
2,Don DeLillo,1985
3,Jhumpa Lahiri,1996
4,John Steinbeck,1945
5,Michael Chabon,2000
6,Neil Gaiman,2001
7,Patti Smith,2010
8,Raymond Carver,1981


In [17]:
#Sum all pages each author has written
q12 = '''SELECT CONCAT(author_fname,' ', author_lname) as author_name, SUM(pages) FROM books
        GROUP BY 1'''
pd.read_sql_query(q12, conn)

Unnamed: 0,author_name,SUM(pages)
0,Dave Eggers,1293.0
1,David Foster Wallace,672.0
2,Don DeLillo,320.0
3,Jhumpa Lahiri,489.0
4,John Steinbeck,181.0
5,Michael Chabon,634.0
6,Neil Gaiman,977.0
7,Patti Smith,304.0
8,Raymond Carver,702.0


In [18]:
#Calculate the average stock quantity for books released in the same year
q13 = ''' SELECT released_year, AVG(stock_quantity) as avg_produced FROM books GROUP BY released_year'''
pd.read_sql_query(q13, conn)

Unnamed: 0,released_year,avg_produced
0,1945,95.0
1,1981,23.0
2,1985,49.0
3,1989,12.0
4,1996,97.0
5,2000,68.0
6,2001,58.0
7,2003,66.0
8,2004,172.0
9,2005,92.0


In [19]:
#Find the full name of the author who wrote the longest book
q14 = ''' SELECT CONCAT(author_fname,' ',author_lname) as author_name, MAX(pages) FROM books'''
pd.read_sql_query(q14, conn)

Unnamed: 0,author_name,MAX(pages)
0,Jhumpa Lahiri,634


## Logical Operators

In [20]:
#Select all books NOT published in 2000
q15 = ''' SELECT * FROM books WHERE released_year !=2000'''
pd.read_sql_query(q15, conn)

Unnamed: 0,book_id,title,author_fname,author_lname,released_year,stock_quantity,pages
0,1,The Namesake,Jhumpa,Lahiri,2003,32,291
1,2,Norse Mythology,Neil,Gaiman,2016,43,304
2,3,American Gods,Neil,Gaiman,2001,12,465
3,4,Interpreter of Maladies,Jhumpa,Lahiri,1996,97,198
4,5,A Hologram for the King: A Novel,Dave,Eggers,2012,154,352
5,6,The Circle,Dave,Eggers,2013,26,504
6,8,Just Kids,Patti,Smith,2010,55,304
7,9,A Heartbreaking Work of Staggering Genius,Dave,Eggers,2001,104,437
8,10,Coraline,Neil,Gaiman,2003,100,208
9,11,What We Talk About When We Talk About Love: St...,Raymond,Carver,1981,23,176


In [21]:
#Select books with titles that don't start with 'W'
q16 = '''SELECT * FROM books WHERE title NOT LIKE 'W%' '''
pd.read_sql_query(q16, conn).head()

Unnamed: 0,book_id,title,author_fname,author_lname,released_year,stock_quantity,pages
0,1,The Namesake,Jhumpa,Lahiri,2003,32,291
1,2,Norse Mythology,Neil,Gaiman,2016,43,304
2,3,American Gods,Neil,Gaiman,2001,12,465
3,4,Interpreter of Maladies,Jhumpa,Lahiri,1996,97,198
4,5,A Hologram for the King: A Novel,Dave,Eggers,2012,154,352


In [22]:
#SELECT books written by Dave Eggers, published after the year 2010
q = ''' SELECT * FROM books WHERE CONCAT(author_fname,' ',author_lname) = 'Dave Eggers' and released_year>2010 '''
pd.read_sql_query(q, conn)

Unnamed: 0,book_id,title,author_fname,author_lname,released_year,stock_quantity,pages
0,5,A Hologram for the King: A Novel,Dave,Eggers,2012,154,352
1,6,The Circle,Dave,Eggers,2013,26,504


In [23]:
#Select all books written by Carver,Lahiri,Smith
q = '''SELECT * FROM books WHERE author_lname IN ('Carver','Lahiri','Smith')'''
pd.read_sql_query(q, conn)

Unnamed: 0,book_id,title,author_fname,author_lname,released_year,stock_quantity,pages
0,1,The Namesake,Jhumpa,Lahiri,2003,32,291
1,4,Interpreter of Maladies,Jhumpa,Lahiri,1996,97,198
2,8,Just Kids,Patti,Smith,2010,55,304
3,11,What We Talk About When We Talk About Love: St...,Raymond,Carver,1981,23,176
4,12,Where I'm Calling From: Selected Stories,Raymond,Carver,1989,12,526


In [24]:
#I only want books released after 2000 and year is even number
q = '''SELECT * FROM books WHERE released_year>2000 and released_year % 2=0'''
pd.read_sql_query(q, conn)

Unnamed: 0,book_id,title,author_fname,author_lname,released_year,stock_quantity,pages
0,2,Norse Mythology,Neil,Gaiman,2016,43,304
1,5,A Hologram for the King: A Novel,Dave,Eggers,2012,154,352
2,8,Just Kids,Patti,Smith,2010,55,304
3,15,Oblivion: Stories,David,Foster Wallace,2004,172,329


In [25]:
#If title contains 'stories'   -> Short Stories,Just Kids-> Memoir,Everything Else -> Novel
q = '''SELECT title, author_fname,
        CASE 
            WHEN title LIKE '%stories%' THEN 'Short Stories'
            WHEN title LIKE '%Just Kids%' THEN 'Memoir'
            ELSE 'Novel'
        END AS 'Genre'
        FROM books;'''
pd.read_sql_query(q, conn)


Unnamed: 0,title,author_fname,Genre
0,The Namesake,Jhumpa,Novel
1,Norse Mythology,Neil,Novel
2,American Gods,Neil,Novel
3,Interpreter of Maladies,Jhumpa,Novel
4,A Hologram for the King: A Novel,Dave,Novel
5,The Circle,Dave,Novel
6,The Amazing Adventures of Kavalier & Clay,Michael,Novel
7,Just Kids,Patti,Memoir
8,A Heartbreaking Work of Staggering Genius,Dave,Novel
9,Coraline,Neil,Novel


In [26]:
conn.close()