#### Imports

In [1]:
import pandas as pd
import sqlite3
pd.set_option('max_colwidth', None)

#### Initiate an empty database

In [2]:
connection = sqlite3.connect(":memory:")

#### Create a table

In [3]:
# CREATE A TABLE
connection.execute('''CREATE TABLE faculty
             (
             id int,
             first_name text,
             last_name text,
             date_of_birth date
             )''')

<sqlite3.Cursor at 0x7ff35a552260>

#### Insert some data

In [4]:
# INSERT DATA
connection.execute('''
INSERT INTO faculty
VALUES ("1", "John", "Smith", "1970-01-02"

)
''')

<sqlite3.Cursor at 0x7ff35a5522d0>

In [5]:
# TRIVIAL TEST VIA SELECT
result = connection.execute("""
SELECT last_name, date_of_birth

FROM faculty

""")
result.fetchall()

[('Smith', '1970-01-02')]

#### Make the SQL database to a pandas DataFrame

In [6]:
pd.read_sql("""SELECT * FROM faculty""", connection)

Unnamed: 0,id,first_name,last_name,date_of_birth
0,1,John,Smith,1970-01-02


---

## Cities Database

Download the [Cities Database](http://oxrep.classics.ox.ac.uk/oxrep/docs/Hanson2016/Hanson2016_CitiesDatabase_OxREP.xlsx) of Hanson, J. W. (2016) _Cities Database_ (OXREP databases). Version 1.0. Accessed (date): <http://oxrep.classics.ox.ac.uk/databases/cities/>. DOI: <https://doi.org/10.5287/bodleian:eqapevAn8>

In titles, replace all spaces with underscores and convert case to lowercase.

In [7]:
cities_df

NameError: name 'cities_df' is not defined

In [10]:
# This line resets the memory.
connection = sqlite3.connect(":memory:")

# Read the excel as pandas DataFrame
cities_df = pd.read_excel('Hanson2016_citiesDatabase_OxREP.xlsx', sheet_name = 'cities')

# Turn the DataFrame to SQL
cities_df.to_sql('cities', con = connection, index = False) # Why False?

1388

#### Let's perform a query

In [None]:
pd.read_sql("SELECT * FROM cities", connection).head()

#### `WHERE`

In [None]:
pd.read_sql("""

SELECT id, ancient_toponym, modern_toponym, province, country, start_date, end_date

FROM cities

WHERE province LIKE '%Maced%'

""", connection)

#### `AND`

In [None]:
pd.read_sql("""

SELECT id, ancient_toponym, modern_toponym, province, country, start_date, end_date

FROM cities

WHERE province = 'Macedonia'

AND start_date > -323



""", connection)

#### What is NULL?

In [None]:
pd.read_sql("""

SELECT id, ancient_toponym, modern_toponym, province, country, start_date, end_date

FROM cities

WHERE province = 'Macedonia'

AND end_date IS NULL


""", connection).head()

#### Subquery: Find all cities older than Torone in Macedonia

In [None]:
pd.read_sql("""

SELECT id, ancient_toponym, modern_toponym, province, country, start_date, end_date

FROM cities

WHERE province = 'Macedonia'

AND start_date <= (

SELECT start_date

FROM cities

WHERE ancient_toponym = "Torone"

)

""", connection)

#### Let's select all countries where there were ancient cities

In [None]:
pd.read_sql("""

SELECT country

FROM cities



""", connection)

#### ` ORDER BY`

In [None]:
pd.read_sql("""

SELECT country

FROM cities

ORDER BY country

""", connection)

#### `DISTINCT`

In [None]:
pd.read_sql("""

SELECT DISTINCT country

FROM cities

ORDER BY country

""", connection)

#### Exercise: Find all the countries that have cities on ancient Macedonia

In [None]:
pd.read_sql("""

SELECT DISTINCT country

FROM cities

WHERE province = 'Macedonia'


""", connection)

#### Exercise: Find all provinces eastern than Athens (Athenae) that have cities are newer than Athens

In [None]:
pd.read_sql("""

SELECT DISTINCT province

FROM cities

WHERE latitude < (

SELECT latitude

FROM cities

WHERE ancient_toponym = 'Athenae'

)

AND start_date > (

SELECT start_date

FROM cities

WHERE ancient_toponym = 'Athenae'

)

""", connection)

#### Exercise: Count all provinces eastern than Athens (Athenae) that have cities are newer than Athens

In [None]:
pd.read_sql("""

SELECT DISTINCT province, count(*) as count

FROM cities

WHERE latitude < (

SELECT latitude

FROM cities

WHERE ancient_toponym = 'Athenae'

)

AND start_date > (

SELECT start_date

FROM cities

WHERE ancient_toponym = 'Athenae'

)

GROUP BY province


""", connection)

#### Read the sheet _monuments_ of Hanson2016_citiesDatabase_OxREP

In [12]:
monuments_df = pd.read_excel('Hanson2016_citiesDatabase_OxREP.xlsx', sheet_name = 'monuments')

monuments_df.to_sql('monuments', con = connection, index = False)

pd.read_sql("SELECT * FROM monuments", connection).head(2)

Unnamed: 0,id,structure
0,Hanson2016_1,Acropolis
1,Hanson2016_1,Agora


Notice that we are not creating a new connection

In [13]:
pd.read_sql("""

SELECT cities.ancient_toponym, monuments.structure

FROM monuments

JOIN cities ON cities.id = monuments.id


""", connection)

Unnamed: 0,ancient_toponym,structure
0,Abae,Acropolis
1,Abae,Agora
2,Abae,Temple of Apollo
3,Abae,Theatre
4,Abae,Walls
...,...,...
9466,Seuthopolis,Temple of Dionysus
9467,Seuthopolis,Urban grid
9468,Seuthopolis,Walls
9469,Bararus,Theatre


#### Exercise create an ordered list with the city of each province that has the most monuments along with the counts of the monuments

#### What about `iloc` and `loc`?