#### Imports

In [9]:
import pandas as pd
import sqlite3
pd.set_option('max_colwidth', None)

#### Initiate an empty database

In [3]:
connection = sqlite3.connect(":memory:")

#### Create a table

In [4]:
# CREATE A TABLE
connection.execute('''CREATE TABLE faculty
             (
             id int,
             first_name text,
             last_name text,
             date_of_birth text
             )''')

<sqlite3.Cursor at 0x7f9b7c798650>

#### Insert some data

In [5]:
# INSERT DATA
connection.execute('''
INSERT INTO faculty
VALUES ("1", "John", "Smith", "1970-01-02"
)
''')

<sqlite3.Cursor at 0x7f9b7c798570>

In [6]:
# TRIVIAL TEST VIA SELECT
result = connection.execute("""
SELECT last_name, date_of_birth

FROM faculty
"""
)
result.fetchall()

[('Smith', '1970-01-02')]

#### Make the SQL database to a pandas DataFrame

In [7]:
pd.read_sql("""SELECT * FROM faculty""", connection)

Unnamed: 0,id,first_name,last_name,date_of_birth
0,1,John,Smith,1970-01-02


---

## Cities Database

Download the [Cities Database](http://oxrep.classics.ox.ac.uk/oxrep/docs/Hanson2016/Hanson2016_CitiesDatabase_OxREP.xlsx) of Hanson, J. W. (2016) _Cities Database_ (OXREP databases). Version 1.0. Accessed (date): <http://oxrep.classics.ox.ac.uk/databases/cities/>. DOI: <https://doi.org/10.5287/bodleian:eqapevAn8>

In titles, replace all spaces with underscores and convert case to lowercase.

In [8]:
# This line resets the memory.
connection = sqlite3.connect(":memory:")

# Read the excel as pandas DataFrame
cities_df = pd.read_excel('Hanson2016_citiesDatabase_OxREP.xlsx', sheet_name = 'cities')

# Turn the DataFrame to SQL
cities_df.to_sql('cities', con = connection, index = False) # Why False?

cities_df

FileNotFoundError: [Errno 2] No such file or directory: 'Hanson2016_citiesDatabase_OxREP.xlsx'

#### Let's perform a query

In [None]:
pd.read_sql("SELECT * FROM cities", connection).head()

#### `WHERE`

In [11]:
pd.read_sql("""

SELECT id, ancient_toponym, modern_toponym, province, country, start_date, end_date

FROM cities

WHERE province = 'Macedonia'

""", connection)

Unnamed: 0,id,ancient_toponym,modern_toponym,province,country,start_date,end_date
0,Hanson2016_1146,Amantia,Klos,Macedonia,Albania,-600,
1,Hanson2016_1147,Amphipolis,Amfipoli,Macedonia,Greece,-437,
2,Hanson2016_1148,Apollonia (Macedonia),Pojani,Macedonia,Albania,-588,
3,Hanson2016_1149,Beroea (Macedonia),Veria,Macedonia,Greece,-400,
4,Hanson2016_1150,Byllis,Gradisht,Macedonia,Albania,-400,
5,Hanson2016_1151,Cassandrea,Cassandrea,Macedonia,Greece,-600,
6,Hanson2016_1152,Dium,Dion,Macedonia,Greece,-400,
7,Hanson2016_1153,Dyrrachium,Durrës,Macedonia,Albania,-627,
8,Hanson2016_1154,Edessa,Edessa,Macedonia,Greece,-600,
9,Hanson2016_1155,Heraclea (Macedonia),Bitola,Macedonia,Macedonia,-359,


#### `AND`

In [13]:
pd.read_sql("""

SELECT id, ancient_toponym, modern_toponym, province, country, start_date, end_date

FROM cities

WHERE province = 'Macedonia'

AND start_date > -323

""", connection)

Unnamed: 0,id,ancient_toponym,modern_toponym,province,country,start_date,end_date
0,Hanson2016_1160,Stobi,Stobi,Macedonia,Macedonia,-229,
1,Hanson2016_1162,Thessalonica,Thessaloniki,Macedonia,Greece,-316,


#### What is NULL?

In [14]:
pd.read_sql("""

SELECT id, ancient_toponym, modern_toponym, province, country, start_date, end_date

FROM cities

WHERE province = 'Macedonia'

AND end_date IS NULL

""", connection).head()

Unnamed: 0,id,ancient_toponym,modern_toponym,province,country,start_date,end_date
0,Hanson2016_1146,Amantia,Klos,Macedonia,Albania,-600,
1,Hanson2016_1147,Amphipolis,Amfipoli,Macedonia,Greece,-437,
2,Hanson2016_1148,Apollonia (Macedonia),Pojani,Macedonia,Albania,-588,
3,Hanson2016_1149,Beroea (Macedonia),Veria,Macedonia,Greece,-400,
4,Hanson2016_1150,Byllis,Gradisht,Macedonia,Albania,-400,


#### Subquery: Find all cities older than Torone in Macedonia

In [15]:
pd.read_sql("""

SELECT id, ancient_toponym, modern_toponym, province, country, start_date, end_date

FROM cities

WHERE province = 'Macedonia'

AND start_date < 
            (SELECT start_date 

            FROM cities

            WHERE ancient_toponym = "Torone"
            )

""", connection)

Unnamed: 0,id,ancient_toponym,modern_toponym,province,country,start_date,end_date
0,Hanson2016_1146,Amantia,Klos,Macedonia,Albania,-600,
1,Hanson2016_1147,Amphipolis,Amfipoli,Macedonia,Greece,-437,
2,Hanson2016_1148,Apollonia (Macedonia),Pojani,Macedonia,Albania,-588,
3,Hanson2016_1151,Cassandrea,Cassandrea,Macedonia,Greece,-600,
4,Hanson2016_1153,Dyrrachium,Durrës,Macedonia,Albania,-627,
5,Hanson2016_1154,Edessa,Edessa,Macedonia,Greece,-600,
6,Hanson2016_1156,Lychnidus,Ochrid,Macedonia,Macedonia,-600,
7,Hanson2016_1157,Neapolis (Macedonia),Kavala,Macedonia,Greece,-650,
8,Hanson2016_1158,Pella (Macedonia),Pella,Macedonia,Greece,-413,
9,Hanson2016_1161,Thasos,Thasos,Macedonia,Greece,-700,


#### Let's select all counties where there were ancient cities

In [19]:
pd.read_sql("""

SELECT DISTINCT country 

FROM cities

ORDER BY country

""", connection)

Unnamed: 0,country
0,Albania
1,Algeria
2,Austria
3,Belgium
4,Bosnia and Herzegovina
5,Bulgaria
6,Croatia
7,Cyprus
8,Egypt
9,France


#### ` ORDER BY`

In [None]:
pd.read_sql("""

SELECT DISTINCT country

FROM cities

WHERE province = 'Macedonia'

""", connection)

#### `DISTINCT`

In [21]:
pd.read_sql("""

SELECT DISTINCT province

FROM cities

WHERE longitude > (SELECT latitude 
                    FROM cities
                    WHERE ancient_toponym = 'Athenae')
AND start_date < (SELECT start_date 
                    FROM cities
                    WHERE ancient_toponym = 'Athenae')

""", connection)

Unnamed: 0,province


#### Exercise: Find all the countries that have cities on ancient Macedonia

In [None]:
pd.read_sql("""

SELECT DISTINCT country

FROM cities

WHERE province = 'Macedonia'

""", connection)

#### Exercise: Find all provinces eastern than Athens (Athenae) that have cities are older than Athens

In [24]:
pd.read_sql("""

SELECT DISTINCT province

FROM cities

WHERE latitude > (SELECT latitude 
                    FROM cities
                    WHERE ancient_toponym = 'Athenae')
AND start_date > (SELECT start_date 
                    FROM cities
                    WHERE ancient_toponym = 'Athenae')
                    
""", connection)

Unnamed: 0,province
0,Achaea
1,Alpes Cottiae
2,Alpes Graiae et Poeninae
3,Alpes Maritimae
4,Asia
5,Baetica
6,Bithynia et Pontus
7,Britannia
8,Cappadocia et Galatia
9,Corsica et Sardinia


#### Exercise: Count all provinces eastern than Athens (Athenae) that have cities are older than Athens

In [1]:
pd.read_sql("""

SELECT DISTINCT province, count(*) as count 

FROM cities

WHERE latitude > (SELECT latitude 
                    FROM cities
                    WHERE ancient_toponym = 'Athenae')
AND start_date > (SELECT start_date 
                    FROM cities
                    WHERE ancient_toponym = 'Athenae')
                    

""", connection)

NameError: name 'pd' is not defined

#### Read the sheet _monuments_ of Hanson2016_citiesDatabase_OxREP

In [25]:
monuments_df = pd.read_excel('Hanson2016_citiesDatabase_OxREP.xlsx', sheet_name = 'monuments')

monuments_df.to_sql('monuments', con = connection, index = False)

pd.read_sql("SELECT * FROM monuments", connection).head(2)

Unnamed: 0,id,structure
0,Hanson2016_1,Acropolis
1,Hanson2016_1,Agora


Notice that we are not creating a new connection

In [26]:
pd.read_sql("""

SELECT cities.ancient_toponym, monuments.structure

FROM monuments

JOIN cities ON cities.id = monuments.id


""", connection)

Unnamed: 0,ancient_toponym,structure
0,Abae,Acropolis
1,Abae,Agora
2,Abae,Temple of Apollo
3,Abae,Theatre
4,Abae,Walls
...,...,...
9466,Seuthopolis,Temple of Dionysus
9467,Seuthopolis,Urban grid
9468,Seuthopolis,Walls
9469,Bararus,Theatre


#### Exercise create an ordered list with the city of each province that has the most monuments along with the counts of the monuments

In [None]:
pd.read_sql("""

SELECT DISTINCT province

FROM cities

WHERE latitude > (SELECT latitude 
                    FROM cities
                    WHERE ancient_toponym = 'Athenae')
AND start_date > (SELECT start_date 
                    FROM cities
                    WHERE ancient_toponym = 'Athenae')
                    
""", connection)

#### What about `iloc` and `loc`?