# HAVING vs. WHERE

In [1]:
import sqlite3
import pandas as pd

In [2]:
c = sqlite3.connect("movies.db")
c

<sqlite3.Connection at 0x11d2a8030>

In [6]:
pd.read_sql("select * from movies", c).head()

Unnamed: 0,Title,Director,Year,Runtime,Rating,Revenue
0,Guardians of the Galaxy,James Gunn,2014,121,8.1,333.13
1,Prometheus,Ridley Scott,2012,124,7.0,126.46
2,Split,M. Night Shyamalan,2016,117,7.3,138.12
3,Sing,Christophe Lourdelet,2016,108,7.2,270.32
4,Suicide Squad,David Ayer,2016,123,6.2,325.02


In [11]:
# please list the directors having at least one movie rated over 8
pd.read_sql("""
SELECT DISTINCT director
FROM movies
WHERE rating > 8
""", c)

Unnamed: 0,Director
0,James Gunn
1,Damien Chazelle
2,Mel Gibson
3,Garth Davis
4,S.S. Rajamouli
5,Christopher Nolan
6,J.J. Abrams
7,George Miller
8,Byron Howard
9,Joss Whedon


In [12]:
pd.read_sql("select * from movies", c).head()

Unnamed: 0,Title,Director,Year,Runtime,Rating,Revenue
0,Guardians of the Galaxy,James Gunn,2014,121,8.1,333.13
1,Prometheus,Ridley Scott,2012,124,7.0,126.46
2,Split,M. Night Shyamalan,2016,117,7.3,138.12
3,Sing,Christophe Lourdelet,2016,108,7.2,270.32
4,Suicide Squad,David Ayer,2016,123,6.2,325.02


In [14]:
# please list the years where there were more than 100 movies
pd.read_sql("""
SELECT year, COUNT(*) as movie_count
FROM movies
GROUP BY year
HAVING movie_count > 100
""", c)

Unnamed: 0,Year,movie_count
0,2015,127
1,2016,296


In [15]:
pd.read_sql("select * from movies", c).head()

Unnamed: 0,Title,Director,Year,Runtime,Rating,Revenue
0,Guardians of the Galaxy,James Gunn,2014,121,8.1,333.13
1,Prometheus,Ridley Scott,2012,124,7.0,126.46
2,Split,M. Night Shyamalan,2016,117,7.3,138.12
3,Sing,Christophe Lourdelet,2016,108,7.2,270.32
4,Suicide Squad,David Ayer,2016,123,6.2,325.02


In [21]:
# q1 (WHERE): list movies with Ridley Scott
pd.read_sql("SELECT * FROM movies WHERE Director = 'Ridley Scott' ", c)

Unnamed: 0,Title,Director,Year,Runtime,Rating,Revenue
0,Prometheus,Ridley Scott,2012,124,7.0,126.46
1,The Martian,Ridley Scott,2015,144,8.0,228.43
2,Robin Hood,Ridley Scott,2010,140,6.7,105.22
3,American Gangster,Ridley Scott,2007,157,7.8,130.13
4,Exodus: Gods and Kings,Ridley Scott,2014,150,6.0,65.01
5,The Counselor,Ridley Scott,2013,117,5.3,16.97
6,A Good Year,Ridley Scott,2006,117,6.9,7.46
7,Body of Lies,Ridley Scott,2008,128,7.1,39.38


In [25]:
# q2 (WHERE): which movies have the highest revenue to rating ratio
pd.read_sql("""
SELECT *, Rating/Revenue AS ratio
FROM movies
WHERE ratio > 8
ORDER BY ratio
""", c)

Unnamed: 0,Title,Director,Year,Runtime,Rating,Revenue,ratio
0,Sleeping with Other People,Leslye Headland,2015,101,6.5,0.81,8.024691
1,La tortue rouge,Michael Dudok de Wit,2016,80,7.6,0.92,8.260870
2,Frantz,François Ozon,2016,113,7.5,0.86,8.720930
3,Nymphomaniac: Vol. I,Lars von Trier,2013,117,7.0,0.79,8.860759
4,Goksung,Hong-jin Na,2016,156,7.5,0.79,9.493671
5,Knight of Cups,Terrence Malick,2015,118,5.7,0.56,10.178571
6,The Do-Over,Steven Brill,2016,108,5.7,0.54,10.555556
7,American Honey,Andrea Arnold,2016,163,7.0,0.66,10.606061
8,The Stanford Prison Experiment,Kyle Patrick Alvarez,2015,122,6.9,0.64,10.781250
9,All Good Things,Andrew Jarecki,2010,101,6.3,0.58,10.862069


In [27]:
# q3 (HAVING): list directors with average revenue > 100
pd.read_sql("""
SELECT director, AVG(revenue) AS avg_revenue
FROM movies
GROUP BY director
HAVING avg_revenue > 100
""", c)

Unnamed: 0,Director,avg_revenue
0,Adam McKay,109.535000
1,Alan Taylor,148.045000
2,Alessandro Carloni,143.520000
3,Alfonso Cuarón,154.685000
4,Andrew Stanton,261.053333
5,Angelina Jolie,115.600000
6,Anne Fletcher,114.610000
7,Anthony Russo,333.915000
8,Bill Condon,286.790000
9,Brad Bird,169.740000


In [30]:
# q4(HAVING+WHERE): list the directors producing
#                   more than 5 movies after 2000
pd.read_sql("""
SELECT director, COUNT(*) AS movie_count
FROM movies
WHERE year > 2000
GROUP BY director
HAVING movie_count > 5
""", c)

Unnamed: 0,Director,movie_count
0,David Yates,6
1,M. Night Shyamalan,6
2,Michael Bay,6
3,Paul W.S. Anderson,6
4,Ridley Scott,8
