In [1]:
import pandas as pd
import pymysql

conn = pymysql.connect(host='localhost', port=int(3306), user='root',passwd='1234', db='sakila')
cursor = conn.cursor(pymysql.cursors.DictCursor)

### JOIN

* ch4에서 INNER와 ON, 그리고 공통의 foreign key를 사용해 여러 테이블의 자료를 불러오는 방법을 배웠다.
  
* JOIN도 이와 마찬가지로 여러 테이블에 있는 데이터를 하나의 테이블로 만드는 방법이다. 
* 하지만 JOIN은 foreign key가 공유되지 않더라도 테이블을 만들 수 있다. 

In [3]:
sen = '''

SELECT c.first_name, c.last_name, a.address
FROM customer AS C JOIN address AS a ;

'''

cursor.execute(sen)
pd.DataFrame(cursor.fetchall())

Unnamed: 0,first_name,last_name,address
0,AUSTIN,CINTRON,47 MySakila Drive
1,WADE,DELVALLE,47 MySakila Drive
2,FREDDIE,DUGGAN,47 MySakila Drive
3,ENRIQUE,FORSYTHE,47 MySakila Drive
4,TERRENCE,GUNDERSON,47 MySakila Drive
...,...,...,...
361192,ELIZABETH,BROWN,1325 Fukuyama Street
361193,BARBARA,JONES,1325 Fukuyama Street
361194,LINDA,WILLIAMS,1325 Fukuyama Street
361195,PATRICIA,JOHNSON,1325 Fukuyama Street


### Cartesian product

361197이 나온 이유는 599*603을 했기 때문임. 이는 어떻게 join할지 명령어를 추가하지 않아서 발생한다.



In [6]:
sen = '''

SELECT c.first_name, c.last_name, a.address
FROM customer AS c JOIN address AS a
    ON c.address_id = a.address_id;

'''

cursor.execute(sen)
pd.DataFrame(cursor.fetchall())

Unnamed: 0,first_name,last_name,address
0,MARY,SMITH,1913 Hanoi Way
1,PATRICIA,JOHNSON,1121 Loja Avenue
2,LINDA,WILLIAMS,692 Joliet Street
3,BARBARA,JONES,1566 Inegl Manor
4,ELIZABETH,BROWN,53 Idfu Parkway
...,...,...,...
594,TERRENCE,GUNDERSON,844 Bucuresti Place
595,ENRIQUE,FORSYTHE,1101 Bucuresti Boulevard
596,FREDDIE,DUGGAN,1103 Quilmes Boulevard
597,WADE,DELVALLE,1331 Usak Boulevard


In [7]:
sen = '''

SELECT c.first_name, c.last_name, a.address
FROM customer AS c INNER JOIN address AS a
    ON c.address_id = a.address_id;

'''

cursor.execute(sen)
pd.DataFrame(cursor.fetchall())

Unnamed: 0,first_name,last_name,address
0,MARY,SMITH,1913 Hanoi Way
1,PATRICIA,JOHNSON,1121 Loja Avenue
2,LINDA,WILLIAMS,692 Joliet Street
3,BARBARA,JONES,1566 Inegl Manor
4,ELIZABETH,BROWN,53 Idfu Parkway
...,...,...,...
594,TERRENCE,GUNDERSON,844 Bucuresti Place
595,ENRIQUE,FORSYTHE,1101 Bucuresti Boulevard
596,FREDDIE,DUGGAN,1103 Quilmes Boulevard
597,WADE,DELVALLE,1331 Usak Boulevard


### JOIN을 사용할 때 종류를 적어주는 습관을 들이자.
JOIN의 종류는 여러가지가 있지만 이번 장에는 INNER와 OUTTER만 설명하고 있다.

INNER JOIN은 ON으로 매칭되는 값만 반환하는 방법이고 

OUTTER JOIN은 매칭되지 않는 값이라 할지라도 모두 반환하는 방법이다.

방법에 차이가 있는만큼 JOIN을 쓸때 어떤 종류인지 적는 습관을 들이자


### USING
테이블에 row를 공유할 수 있는 value가 있을때(foreign key) 복잡한 수식없이 간편하게 연결시켜주는 method이다.

In [8]:
sen = '''

SELECT c.first_name, c.last_name, a.address
FROM customer AS c INNER JOIN address AS a
    USING (address_id);

'''
cursor.execute(sen)
pd.DataFrame(cursor.fetchall())


Unnamed: 0,first_name,last_name,address
0,MARY,SMITH,1913 Hanoi Way
1,PATRICIA,JOHNSON,1121 Loja Avenue
2,LINDA,WILLIAMS,692 Joliet Street
3,BARBARA,JONES,1566 Inegl Manor
4,ELIZABETH,BROWN,53 Idfu Parkway
...,...,...,...
594,TERRENCE,GUNDERSON,844 Bucuresti Place
595,ENRIQUE,FORSYTHE,1101 Bucuresti Boulevard
596,FREDDIE,DUGGAN,1103 Quilmes Boulevard
597,WADE,DELVALLE,1331 Usak Boulevard


### 3개 JOIN 하기

address table을 중심으로 customer table과 city table이 연결됨

In [11]:
sen = '''

SELECT c.first_name, c.last_name, ct.city
FROM customer AS c 
    INNER JOIN address AS a
    ON c.address_id = a.address_id
    INNER JOIN city AS ct
    ON ct.city_id = a.city_id;
'''

cursor.execute(sen)
pd.DataFrame(cursor.fetchall())

Unnamed: 0,first_name,last_name,city
0,MARY,SMITH,Sasebo
1,PATRICIA,JOHNSON,San Bernardino
2,LINDA,WILLIAMS,Athenai
3,BARBARA,JONES,Myingyan
4,ELIZABETH,BROWN,Nantou
...,...,...,...
594,TERRENCE,GUNDERSON,Jinzhou
595,ENRIQUE,FORSYTHE,Patras
596,FREDDIE,DUGGAN,Sullana
597,WADE,DELVALLE,Lausanne


### SubQuery로 구현하기

In [12]:
sen = '''

SELECT c.first_name, c.last_name, addr.address, addr.city
FROM customer AS c
    INNER JOIN
    (
        SELECT a.address_id, a.address, ct.city
        FROM address AS a
            INNER JOIN city AS ct
            ON a.city_id = ct.city_id
            WHERE a.district = 'California'
    ) AS addr
    ON c.address_id = addr.address_id;

'''

cursor.execute(sen)
pd.DataFrame(cursor.fetchall())

Unnamed: 0,first_name,last_name,address,city
0,PATRICIA,JOHNSON,1121 Loja Avenue,San Bernardino
1,BETTY,WHITE,770 Bydgoszcz Avenue,Citrus Heights
2,ALICE,STEWART,1135 Izumisano Parkway,Fontana
3,ROSA,REYNOLDS,793 Cam Ranh Avenue,Lancaster
4,RENEE,LANE,533 al-Ayn Boulevard,Compton
5,KRISTIN,JOHNSTON,226 Brest Manor,Sunnyvale
6,CASSANDRA,WALTERS,920 Kumbakonam Loop,Salinas
7,JACOB,LANCE,1866 al-Qatif Avenue,El Monte
8,RENE,MCALISTER,1895 Zhezqazghan Drive,Garden Grove


In [16]:
sen = '''

SELECT f.title
FROM film AS f 
    INNER JOIN film_actor AS fa
    ON f.film_id = fa.film_id
    INNER JOIN actor AS a
    ON fa.actor_id = a.actor_id
WHERE (
    (a.first_name = 'CATE' AND a.last_name ='MCQUEEN') OR
    (a.first_name = 'CUBA' AND a.last_name = 'BIRCH')
    )

'''

cursor.execute(sen)
pd.DataFrame(cursor.fetchall())

### 두 배우 모두 나온 영화를 찾기 위해서는 Table을 두 번 사용해야 한다.

In [19]:
sen = '''

SELECT f.title
FROM film AS f 
    INNER JOIN film_actor AS fa1
    ON f.film_id = fa1.film_id
    INNER JOIN actor AS a1
    ON fa1.actor_id = a1.actor_id
    INNER JOIN film_actor AS fa2
    ON f.film_id = fa2.film_id
    INNER JOIN actor AS a2
    ON fa2.actor_id = a2.actor_id
WHERE (
    (a1.first_name = 'CATE' AND a1.last_name ='MCQUEEN') AND
    (a2.first_name = 'CUBA' AND a2.last_name = 'BIRCH')
    )

'''

cursor.execute(sen)
pd.DataFrame(cursor.fetchall())

Unnamed: 0,title
0,BLOOD ARGONAUTS
1,TOWERS HURRICANE


### EXERCISE

exercise 5-1
1. a
2. ct.city_id

In [22]:
sen = '''

SELECT f.title, a.first_name
FROM film AS f
    INNER JOIN film_actor AS fa
    ON f.film_id = fa.film_id
    INNER JOIN actor AS a
    ON fa.actor_id = a.actor_id
WHERE a.first_name ='JOHN'

'''
cursor.execute(sen)
pd.DataFrame(cursor.fetchall())

Unnamed: 0,title,first_name
0,ALLEY EVOLUTION,JOHN
1,BEVERLY OUTLAW,JOHN
2,CANDLES GRAPES,JOHN
3,CLEOPATRA DEVIL,JOHN
4,COLOR PHILADELPHIA,JOHN
5,CONQUERER NUTS,JOHN
6,DAUGHTER MADIGAN,JOHN
7,GLEAMING JAWBREAKER,JOHN
8,GOLDMINE TYCOON,JOHN
9,HOME PITY,JOHN


In [33]:
### exercise 5-3 ### 하나의 테이블을 두번 사용하는 방법

sen = '''

SELECT a1.address AS addr1, a2.address AS addr2, a1.city_id
FROM address a1
    INNER JOIN address a2
WHERE (a1.city_id = a2.city_id) AND (a1.address_id != a2.address_id)
    

'''


cursor.execute(sen)
pd.DataFrame(cursor.fetchall())

Unnamed: 0,addr1,addr2,city_id
0,47 MySakila Drive,23 Workhaven Lane,300
1,28 MySQL Boulevard,1411 Lillydale Drive,576
2,23 Workhaven Lane,47 MySakila Drive,300
3,1411 Lillydale Drive,28 MySQL Boulevard,576
4,1497 Yuzhou Drive,548 Uruapan Street,312
5,587 Benguela Manor,43 Vilnius Manor,42
6,548 Uruapan Street,1497 Yuzhou Drive,312
7,43 Vilnius Manor,587 Benguela Manor,42


파이썬 문법으로 해도 좋고 SQL 문법으로 해도 좋다. 
지금은 SQL 문법에 익숙해지자.