# SQL 語法入門

> 資料分析

郭耀仁

In [1]:
# 連結資料庫
import sqlite3
import pandas as pd

conn = sqlite3.connect('nba.db')

## 摘要

- 聚合與分組
- 進階的 SQL 查詢技巧
- 聯結關聯式資料庫中的表格

## 聚合與分組

## 我們可以將函式粗分為兩種類型

- 通用函式（Universal functions）
- 聚合函式（Aggregate functions）

## 其中通用函式的特性是輸入與輸出的列數相同

In [2]:
sql_query = """
SELECT firstName,
       UPPER(firstName) AS upper_first_name
  FROM players
 LIMIT 10;
"""

In [3]:
pd.read_sql(sql_query, conn)

Unnamed: 0,firstName,upper_first_name
0,Vince,VINCE
1,Tyson,TYSON
2,LeBron,LEBRON
3,Carmelo,CARMELO
4,Kyle,KYLE
5,Udonis,UDONIS
6,Dwight,DWIGHT
7,Andre,ANDRE
8,JR,JR
9,Trevor,TREVOR


## 聚合函式的特性則是輸出的列數多半遠少於輸入的列數

In [4]:
sql_query = """
SELECT AVG(heightMeters) AS avg_height_meters
  FROM players;
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,avg_height_meters
0,1.99121


## SQLite 常用的聚合函式

<https://www.sqlitetutorial.net/sqlite-aggregate-functions/>

Source: <https://www.sqlitetutorial.net/>

## 使用 `COUNT()` 暸解表格的觀測值數

In [5]:
sql_query = """
SELECT COUNT(*) AS n_players
  FROM players;
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,n_players
0,504


## 使用 `COUNT()` 搭配 `PRAGMA_TABLE_INFO()` 暸解表格的欄位數

In [6]:
sql_query = """
SELECT COUNT(*) AS n_columns
  FROM PRAGMA_TABLE_INFO('players');
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,n_columns
0,20


## 使用 `MAX()` 與 `MIN()` 尋找最大與最小值

In [7]:
sql_query = """
SELECT MAX(heightMeters),
       MIN(heightMeters)
  FROM players;
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,MAX(heightMeters),MIN(heightMeters)
0,2.26,1.75


## 使用 `GROUP BY`

`GROUP BY` 如果單獨存在，作用與 `DISTINCT` 相同。

In [8]:
sql_query = """
SELECT confName,
       divName
  FROM teams
 GROUP BY divName
 ORDER BY confName, divName;
"""

In [9]:
pd.read_sql(sql_query, conn)

Unnamed: 0,confName,divName
0,East,Atlantic
1,East,Central
2,East,Southeast
3,West,Northwest
4,West,Pacific
5,West,Southwest


## 使用 `GROUP BY` 搭配聚合函式就可以實現分組聚合

In [10]:
sql_query = """
SELECT country,
       COUNT(*) AS n_players
  FROM players
 GROUP BY country
 ORDER BY n_players DESC, country
 LIMIT 10;
"""

In [11]:
pd.read_sql(sql_query, conn)

Unnamed: 0,country,n_players
0,USA,384
1,Canada,20
2,France,11
3,Australia,8
4,Croatia,6
5,Serbia,6
6,Germany,5
7,Latvia,4
8,Spain,4
9,Turkey,4


In [12]:
sql_query = """
SELECT pos,
       ROUND(AVG(heightMeters), 2) AS avgHeightMeters
  FROM players
 GROUP BY pos
 ORDER BY avgHeightMeters,
          pos;
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,pos,avgHeightMeters
0,G,1.9
1,G-F,1.98
2,F-G,2.0
3,F,2.03
4,F-C,2.09
5,C-F,2.1
6,C,2.12


## 使用 `HAVING` 篩選聚合後的數值

- `WHERE` 應對以單個「觀測值」為層級的數值
- `HAVING` 應對多個「觀測值」為層級的數值

In [13]:
sql_query = """
SELECT country,
       COUNT(*) AS n_players
  FROM players
 GROUP BY country
HAVING n_players > 5
 ORDER BY n_players DESC;
"""

In [14]:
pd.read_sql(sql_query, conn)

Unnamed: 0,country,n_players
0,USA,384
1,Canada,20
2,France,11
3,Australia,8
4,Croatia,6
5,Serbia,6


In [15]:
sql_query = """
SELECT pos,
       ROUND(AVG(heightMeters), 2) AS avgHeightMeters
  FROM players
 GROUP BY pos
HAVING avgHeightMeters > 2
 ORDER BY avgHeightMeters, pos;
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,pos,avgHeightMeters
0,F,2.03
1,F-C,2.09
2,C-F,2.1
3,C,2.12


## 進階的 SQL 查詢技巧

## 使用 `CASE` 為資料重新歸類

使用條件敘述作重新歸類的依據。

```sql
CASE WHEN condition THEN result
     WHEN another_condition THEN result
     ELSE result
END AS variable_name
```

## `CASE` 的不同用法

- 將數值歸類成文字
- 將細項歸類成大項

In [16]:
sql_query = """
SELECT firstName,
       lastName,
       heightMeters,
       CASE WHEN heightMeters >= 2 THEN 'Taller than 2 m'
            ELSE 'Shorter than 2 m'
       END AS heightCategory
  FROM players
 LIMIT 10;
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,firstName,lastName,heightMeters,heightCategory
0,Vince,Carter,1.98,Shorter than 2 m
1,Tyson,Chandler,2.13,Taller than 2 m
2,LeBron,James,2.06,Taller than 2 m
3,Carmelo,Anthony,2.03,Taller than 2 m
4,Kyle,Korver,2.01,Taller than 2 m
5,Udonis,Haslem,2.03,Taller than 2 m
6,Dwight,Howard,2.08,Taller than 2 m
7,Andre,Iguodala,1.98,Shorter than 2 m
8,JR,Smith,1.98,Shorter than 2 m
9,Trevor,Ariza,2.03,Taller than 2 m


## 細項歸類成大項的根據

```python
{
    'G-F': 'G',
    'G': 'G',
    'F-G': 'F',
    'F-C': 'F',
    'F': 'F',
    'C-F': 'C',
    'C': 'C'
}
```

In [17]:
sql_query = """
SELECT firstName,
       lastName,
       pos,
       CASE WHEN pos IN ('G-F', 'G') THEN 'G'
            WHEN pos IN ('C-F', 'C') THEN 'C'
            ELSE 'F'
       END AS posNew
  FROM players
 LIMIT 10;
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,firstName,lastName,pos,posNew
0,Vince,Carter,G-F,G
1,Tyson,Chandler,C,C
2,LeBron,James,F,F
3,Carmelo,Anthony,F,F
4,Kyle,Korver,G-F,G
5,Udonis,Haslem,F,F
6,Dwight,Howard,C-F,C
7,Andre,Iguodala,G-F,G
8,JR,Smith,G,G
9,Trevor,Ariza,F,F


## 使用子查詢

子查詢指的是在一段查詢語法之中嵌入另一段需要先執行的查詢語法，將子查詢擺放至小括號之中並且加上縮排。

## 目前誰是生涯總得分最高的球員

- 首先要從 `careerSummaries` 著手
- 接著利用在前一個查詢獲得的 `MAX(points)` 對 `careerSummaries` 進行篩選
- 最後是利用前一個查詢獲得的 `personId` 對 `players` 進行篩選

In [18]:
sql_query = """
SELECT MAX(points)
  FROM careerSummaries;
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,MAX(points)
0,34087


In [19]:
sql_query = """
SELECT personId
  FROM careerSummaries
 WHERE points = 34087;
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,personId
0,2544


In [20]:
sql_query = """
SELECT firstName,
       lastName
  FROM players
 WHERE personId = 2544;
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,firstName,lastName
0,LeBron,James


In [21]:
sql_query = """
SELECT firstName,
       lastName
  FROM players
 WHERE personId = (
           SELECT personId
             FROM careerSummaries
            WHERE points = (
                SELECT MAX(points)
                  FROM careerSummaries
            )
       );
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,firstName,lastName
0,LeBron,James


## 目前湖人隊的球員陣容有誰

- 首先要從 `teams` 著手
- 接著利用在前一個查詢獲得的 `teamId` 對 `rosters` 進行篩選
- 最後是利用前一個查詢獲得的 `personId` 對 `players` 進行篩選

In [22]:
sql_query = """
SELECT teamId
    FROM teams
    WHERE nickname = 'Lakers';
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,teamId
0,1610612747


In [23]:
sql_query = """
SELECT personId
  FROM rosters
 WHERE teamId = 1610612747;
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,personId
0,2544
1,2730
2,2747
3,200765
4,201162
5,201580
6,201980
7,202340
8,202693
9,203076


In [24]:
sql_query = """
SELECT firstName,
       lastName
  FROM players
 WHERE personId IN (
           SELECT personId
             FROM rosters
            WHERE teamId = 1610612747
       );
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,firstName,lastName
0,LeBron,James
1,Dwight,Howard
2,JR,Smith
3,Rajon,Rondo
4,Jared,Dudley
5,JaVale,McGee
6,Danny,Green
7,Avery,Bradley
8,Markieff,Morris
9,Anthony,Davis


## 使用子查詢來生成新欄位

依據鋒衛暸解 NBA 球員的組成：
- 首先要從 `players` 著手
- 接著利用在前一個查詢獲得的 `pos` 對 `players` 進行聚合
- 最後是利用前一個查詢獲得的 `n_players` 計算百分比

In [25]:
sql_query = """
SELECT COUNT(*)
  FROM players;
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,COUNT(*)
0,504


In [26]:
sql_query = """
SELECT pos,
       COUNT(*) player_count,
       504 AS ttl_player
  FROM players
 GROUP BY pos;
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,pos,player_count,ttl_player
0,C,33,504
1,C-F,28,504
2,F,137,504
3,F-C,39,504
4,F-G,26,504
5,G,176,504
6,G-F,65,504


In [27]:
sql_query = """
SELECT pos,
       COUNT(*) player_count,
       504 AS ttl_player,
       CAST(COUNT(*) AS REAL) / CAST(504 AS REAL) AS ratio
  FROM players
 GROUP BY pos;
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,pos,player_count,ttl_player,ratio
0,C,33,504,0.065476
1,C-F,28,504,0.055556
2,F,137,504,0.271825
3,F-C,39,504,0.077381
4,F-G,26,504,0.051587
5,G,176,504,0.349206
6,G-F,65,504,0.128968


In [28]:
sql_query = """
SELECT pos,
       CAST(COUNT(*) AS REAL) / (
           SELECT CAST(COUNT(*) AS REAL)
             FROM players
       ) AS ratio
  FROM players
 GROUP BY pos;
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,pos,ratio
0,C,0.065476
1,C-F,0.055556
2,F,0.271825
3,F-C,0.077381
4,F-G,0.051587
5,G,0.349206
6,G-F,0.128968


## 聯結關聯式資料庫中的表格

## 什麼是關聯式資料庫

> 依照關聯式模型所建構的多個有相關的表格，關聯式模型指的是每個表格的觀測值層級都是獨立並且獨一，並能夠透過表格聯結將不同表格的觀測值關聯至一個查詢結果。

## 為什麼關聯式模型

> 採用關聯式模型能夠減少資料的重複並且讓維護變得簡單。

## 使用 `JOIN` 與 `ON` 保留字將資料進行關聯

```sql
SELECT *
  FROM left_table JOIN right_table
    ON left_table.primary_key_column = right_table.foreign_key_column
```

## 聯結 `players` 與 `careerSummaries`

In [29]:
sql_query = """
SELECT *
  FROM players
  JOIN careerSummaries
    ON players.personId = careerSummaries.personId;
"""

In [30]:
pd.read_sql(sql_query, conn)

Unnamed: 0,firstName,lastName,temporaryDisplayName,personId,teamId,jersey,isActive,pos,heightFeet,heightInches,...,ftm,fta,pFouls,points,gamesPlayed,gamesStarted,plusMinus,min,dd2,td3
0,Vince,Carter,"Carter, Vince",1713,1610612737,15.0,True,G-F,6,6,...,4852.0,6082.0,3995.0,25728.0,1541.0,982.0,1816.0,46371.0,90.0,5.0
1,Tyson,Chandler,"Chandler, Tyson",2199,1610612745,19.0,True,C,7,0,...,2393.0,3714.0,3268.0,9509.0,1160.0,886.0,325.0,31617.0,292.0,0.0
2,LeBron,James,"James, LeBron",2544,1610612747,23.0,True,F,6,9,...,7379.0,10044.0,2313.0,34087.0,1258.0,1257.0,6887.0,48327.0,485.0,94.0
3,Carmelo,Anthony,"Anthony, Carmelo",2546,1610612757,0.0,True,F,6,8,...,6028.0,7424.0,3204.0,26314.0,1114.0,1106.0,1614.0,39750.0,171.0,2.0
4,Kyle,Korver,"Korver, Kyle",2594,1610612749,26.0,True,G-F,6,7,...,1290.0,1472.0,2512.0,11903.0,1224.0,422.0,2885.0,31056.0,7.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499,Matt,Thomas,"Thomas, Matt",1629744,1610612761,21.0,True,G,6,4,...,7.0,10.0,31.0,150.0,33.0,0.0,22.0,321.0,0.0,0.0
500,Tariq,Owens,"Owens, Tariq",1629745,1610612756,41.0,True,F,6,10,...,2.0,2.0,1.0,4.0,3.0,0.0,-16.0,15.0,0.0,0.0
501,Javonte,Green,"Green, Javonte",1629750,1610612738,43.0,True,G-F,6,4,...,23.0,36.0,37.0,127.0,44.0,1.0,-5.0,414.0,0.0,0.0
502,Juwan,Morgan,"Morgan, Juwan",1629752,1610612762,16.0,True,F,6,7,...,0.0,0.0,7.0,19.0,16.0,0.0,19.0,73.0,0.0,0.0


## 在變數上加上 key 的註記：主鍵（Primary key）

主鍵用來標註一個表格中獨立的觀測值，什麼樣的變數可以被標記為主鍵？

1. 必須獨一
2. 不得有遺漏值

In [31]:
sql_query = """
SELECT name
  FROM PRAGMA_TABLE_INFO('players')
 WHERE pk = 1;
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,name
0,personId


## 在變數上加上 key 的註記：外鍵（Foreign key）

與其他具有相關的表格主鍵相對應的欄位可以被標註為外鍵。

In [32]:
sql_query = """
SELECT *
  FROM PRAGMA_FOREIGN_KEY_LIST('players');
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,id,seq,table,from,to,on_update,on_delete,match
0,0,0,careerSummaries,personId,personId,RESTRICT,RESTRICT,NONE
1,1,0,teams,teamId,teamId,RESTRICT,RESTRICT,NONE


## `JOIN` 是預設的聯結邏輯

- `JOIN` 將左右表格的「交集」觀測值回傳
- `LEFT JOIN` 將左表格「所有」觀測值回傳，對應不到的以遺漏值填補
- `RIGHT JOIN` 將右表格「所有」觀測值回傳，對應不到的以遺漏值填補（SQLite 不支援）
- `FULL OUTER JOIN` 將左右表格的「聯集」觀測值回傳，對應不到的以遺漏值填補（SQLite 不支援）

## 建構一個左表格 `veteran_players`

In [33]:
sql_query = """
SELECT personId,
       temporaryDisplayName
  FROM players
 LIMIT 10;
"""

In [34]:
pd.read_sql(sql_query, conn)

Unnamed: 0,personId,temporaryDisplayName
0,1713,"Carter, Vince"
1,2199,"Chandler, Tyson"
2,2544,"James, LeBron"
3,2546,"Anthony, Carmelo"
4,2594,"Korver, Kyle"
5,2617,"Haslem, Udonis"
6,2730,"Howard, Dwight"
7,2738,"Iguodala, Andre"
8,2747,"Smith, JR"
9,2772,"Ariza, Trevor"


## 建構一個右表格 `top_scorers`

In [35]:
sql_query = """
SELECT personId,
       ppg
  FROM careerSummaries
 ORDER BY ppg DESC
 LIMIT 10;
"""

In [36]:
pd.read_sql(sql_query, conn)

Unnamed: 0,personId,ppg
0,2544,27.1
1,201142,27.0
2,201935,25.1
3,1629029,24.4
4,203954,24.1
5,203076,24.0
6,203081,24.0
7,2546,23.6
8,1629027,23.6
9,1629627,23.6


## 預設的 `JOIN`

In [37]:
sql_query = """
SELECT *
  FROM (SELECT personId,
               temporaryDisplayName
          FROM players
         LIMIT 10) AS veteran_players
  JOIN (SELECT personId,
               ppg
          FROM careerSummaries
         ORDER BY ppg DESC
         LIMIT 10) AS top_scorers
    ON veteran_players.personId = top_scorers.personId;
"""

In [38]:
pd.read_sql(sql_query, conn)

Unnamed: 0,personId,temporaryDisplayName,personId.1,ppg
0,2544,"James, LeBron",2544,27.1
1,2546,"Anthony, Carmelo",2546,23.6


## 改以 `LEFT JOIN` 聯結

In [39]:
sql_query = """
SELECT *
  FROM (SELECT personId,
               temporaryDisplayName
          FROM players
         LIMIT 10) AS veteran_players
  LEFT JOIN (SELECT personId,
                    ppg
               FROM careerSummaries
              ORDER BY ppg DESC
              LIMIT 10) AS top_scorers
    ON veteran_players.personId = top_scorers.personId;
"""

In [40]:
pd.read_sql(sql_query, conn)

Unnamed: 0,personId,temporaryDisplayName,personId.1,ppg
0,1713,"Carter, Vince",,
1,2199,"Chandler, Tyson",,
2,2544,"James, LeBron",2544.0,27.1
3,2546,"Anthony, Carmelo",2546.0,23.6
4,2594,"Korver, Kyle",,
5,2617,"Haslem, Udonis",,
6,2730,"Howard, Dwight",,
7,2738,"Iguodala, Andre",,
8,2747,"Smith, JR",,
9,2772,"Ariza, Trevor",,


## 以 Python pandas 示範 `RIGHT JOIN`

In [41]:
left_df = pd.read_sql("""SELECT personId, temporaryDisplayName FROM players LIMIT 10""", conn)
right_df = pd.read_sql("""SELECT personId, ppg FROM careerSummaries ORDER BY ppg DESC LIMIT 10""", conn)
pd.merge(left_df, right_df, left_on='personId', right_on='personId', how='right')

Unnamed: 0,personId,temporaryDisplayName,ppg
0,2544,"James, LeBron",27.1
1,2546,"Anthony, Carmelo",23.6
2,201142,,27.0
3,201935,,25.1
4,1629029,,24.4
5,203954,,24.1
6,203076,,24.0
7,203081,,24.0
8,1629027,,23.6
9,1629627,,23.6


## 以 Python pandas 示範 `FULL OUTER JOIN`

In [42]:
pd.merge(left_df, right_df, left_on='personId', right_on='personId', how='outer')

Unnamed: 0,personId,temporaryDisplayName,ppg
0,1713,"Carter, Vince",
1,2199,"Chandler, Tyson",
2,2544,"James, LeBron",27.1
3,2546,"Anthony, Carmelo",23.6
4,2594,"Korver, Kyle",
5,2617,"Haslem, Udonis",
6,2730,"Howard, Dwight",
7,2738,"Iguodala, Andre",
8,2747,"Smith, JR",
9,2772,"Ariza, Trevor",


## 使用 `IS NULL` 找出有遺漏的觀測值

In [43]:
sql_query = """
SELECT veteran_players.personId,
       veteran_players.temporaryDisplayName
  FROM (SELECT personId,
               temporaryDisplayName
          FROM players
         LIMIT 10) AS veteran_players
  LEFT JOIN (SELECT personId,
                    ppg
               FROM careerSummaries
              ORDER BY ppg DESC
              LIMIT 10) AS top_scorers
    ON veteran_players.personId = top_scorers.personId
 WHERE top_scorers.ppg IS NULL;
"""

In [44]:
pd.read_sql(sql_query, conn)

Unnamed: 0,personId,temporaryDisplayName
0,1713,"Carter, Vince"
1,2199,"Chandler, Tyson"
2,2594,"Korver, Kyle"
3,2617,"Haslem, Udonis"
4,2730,"Howard, Dwight"
5,2738,"Iguodala, Andre"
6,2747,"Smith, JR"
7,2772,"Ariza, Trevor"


## 關聯式模型定義了三種表格間的關係

- 一對一
- 一對多
- 多對多

In [45]:
sql_query = """
SELECT *
  FROM players
  JOIN careerSummaries
    ON players.personId = careerSummaries.personId;
"""

In [46]:
pd.read_sql(sql_query, conn)

Unnamed: 0,firstName,lastName,temporaryDisplayName,personId,teamId,jersey,isActive,pos,heightFeet,heightInches,...,ftm,fta,pFouls,points,gamesPlayed,gamesStarted,plusMinus,min,dd2,td3
0,Vince,Carter,"Carter, Vince",1713,1610612737,15.0,True,G-F,6,6,...,4852.0,6082.0,3995.0,25728.0,1541.0,982.0,1816.0,46371.0,90.0,5.0
1,Tyson,Chandler,"Chandler, Tyson",2199,1610612745,19.0,True,C,7,0,...,2393.0,3714.0,3268.0,9509.0,1160.0,886.0,325.0,31617.0,292.0,0.0
2,LeBron,James,"James, LeBron",2544,1610612747,23.0,True,F,6,9,...,7379.0,10044.0,2313.0,34087.0,1258.0,1257.0,6887.0,48327.0,485.0,94.0
3,Carmelo,Anthony,"Anthony, Carmelo",2546,1610612757,0.0,True,F,6,8,...,6028.0,7424.0,3204.0,26314.0,1114.0,1106.0,1614.0,39750.0,171.0,2.0
4,Kyle,Korver,"Korver, Kyle",2594,1610612749,26.0,True,G-F,6,7,...,1290.0,1472.0,2512.0,11903.0,1224.0,422.0,2885.0,31056.0,7.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499,Matt,Thomas,"Thomas, Matt",1629744,1610612761,21.0,True,G,6,4,...,7.0,10.0,31.0,150.0,33.0,0.0,22.0,321.0,0.0,0.0
500,Tariq,Owens,"Owens, Tariq",1629745,1610612756,41.0,True,F,6,10,...,2.0,2.0,1.0,4.0,3.0,0.0,-16.0,15.0,0.0,0.0
501,Javonte,Green,"Green, Javonte",1629750,1610612738,43.0,True,G-F,6,4,...,23.0,36.0,37.0,127.0,44.0,1.0,-5.0,414.0,0.0,0.0
502,Juwan,Morgan,"Morgan, Juwan",1629752,1610612762,16.0,True,F,6,7,...,0.0,0.0,7.0,19.0,16.0,0.0,19.0,73.0,0.0,0.0


In [47]:
sql_query = """
SELECT *
  FROM teams
  JOIN rosters
    ON teams.teamId = rosters.teamId;
"""
pd.read_sql(sql_query, conn)

Unnamed: 0,isNBAFranchise,isAllStar,city,altCityName,fullName,tricode,teamId,nickname,urlName,teamShortName,confName,divName,personId,teamId.1
0,True,False,Atlanta,Atlanta,Atlanta Hawks,ATL,1610612737,Hawks,hawks,Atlanta,East,Southeast,1713,1610612737
1,True,False,Houston,Houston,Houston Rockets,HOU,1610612745,Rockets,rockets,Houston,West,Southwest,2199,1610612745
2,True,False,Los Angeles,Los Angeles Lakers,Los Angeles Lakers,LAL,1610612747,Lakers,lakers,L.A. Lakers,West,Pacific,2544,1610612747
3,True,False,Portland,Portland,Portland Trail Blazers,POR,1610612757,Trail Blazers,blazers,Portland,West,Northwest,2546,1610612757
4,True,False,Milwaukee,Milwaukee,Milwaukee Bucks,MIL,1610612749,Bucks,bucks,Milwaukee,East,Central,2594,1610612749
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499,True,False,Toronto,Toronto,Toronto Raptors,TOR,1610612761,Raptors,raptors,Toronto,East,Atlantic,1629744,1610612761
500,True,False,Phoenix,Phoenix,Phoenix Suns,PHX,1610612756,Suns,suns,Phoenix,West,Pacific,1629745,1610612756
501,True,False,Boston,Boston,Boston Celtics,BOS,1610612738,Celtics,celtics,Boston,East,Atlantic,1629750,1610612738
502,True,False,Utah,Utah,Utah Jazz,UTA,1610612762,Jazz,jazz,Utah,West,Northwest,1629752,1610612762


## 表格聯結就如同水平合併

![Imgur](https://i.imgur.com/hq7fS67.png)

Source: [Pandas User Guide](https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html)

## 也能透過 `UNION` 垂直合併表格

![Imgur](https://i.imgur.com/B7xawvp.png)

Source: [Pandas User Guide](https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html)

In [48]:
sql_query = """
SELECT firstName,
       lastName,
       'height' AS category,
       heightmeters AS value
  FROM players
 WHERE firstName = 'LeBron'
 UNION
SELECT firstName,
       lastName,
       'weight' AS category,
       weightKilograms AS value
  FROM players
 WHERE firstName = 'LeBron';
"""

In [49]:
pd.read_sql(sql_query, conn)

Unnamed: 0,firstName,lastName,category,value
0,LeBron,James,height,2.06
1,LeBron,James,weight,113.4


## 這是目前涵蓋的查詢保留字

使用 SQL 語法時，保留字順序必須要遵守。

```sql
SELECT DISTINCT CAST(column_name AS data_type) AS alias_name
  FROM table_name
  JOIN table_name
    ON table_name.pk = table_name.fk
 WHERE conditions
 GROUP BY column_name
HAVING conditions
 ORDER BY column_name
 LIMIT n_obs DESC
UNION;
```

## 前往資料分析：隨堂練習

[資料分析：隨堂練習](https://mybinder.org/v2/gh/yaojenkuo/introduction-to-sql/master?filepath=03-exercises.ipynb)