--------

# Analyzing CIA Factbook Data Using SQL

Resource: DataQuest 

(https://app.dataquest.io/m/257/guided-project%3A-analyzing-cia-factbook-data-using-sql/1/jupyter)

--------

In [1]:
%%capture
%load_ext sql

In [2]:
%sql sqlite:///factbook.db

'Connected: @factbook.db'

Get some information about which tables the database holds

In [4]:
%%sql
SELECT *
    FROM sqlite_master
    WHERE type = 'table';

 * sqlite:///factbook.db
Done.


type,name,tbl_name,rootpage,sql
table,sqlite_sequence,sqlite_sequence,3,"CREATE TABLE sqlite_sequence(name,seq)"
table,facts,facts,47,"CREATE TABLE ""facts"" (""id"" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, ""code"" varchar(255) NOT NULL, ""name"" varchar(255) NOT NULL, ""area"" integer, ""area_land"" integer, ""area_water"" integer, ""population"" integer, ""population_growth"" float, ""birth_rate"" float, ""death_rate"" float, ""migration_rate"" float)"


# Samples from Table 'facts'

In [18]:
%%sql

SELECT *
    FROM facts
    LIMIT 5;

 * sqlite:///factbook.db
Done.


id,code,name,area,area_land,area_water,population,population_growth,birth_rate,death_rate,migration_rate
1,af,Afghanistan,652230,652230,0,32564342,2.32,38.57,13.89,1.51
2,al,Albania,28748,27398,1350,3029278,0.3,12.92,6.58,3.3
3,ag,Algeria,2381741,2381741,0,39542166,1.84,23.67,4.31,0.92
4,an,Andorra,468,468,0,85580,0.12,8.13,6.96,0.0
5,ao,Angola,1246700,1246700,0,19625353,2.78,38.78,11.49,0.46


# Summary Statistics

In [24]:
%%sql
SELECT MAX(population), MAX(population_growth), MIN(population), MIN(population_growth)
    FROM facts;

 * sqlite:///factbook.db
Done.


MAX(population),MAX(population_growth),MIN(population),MIN(population_growth)
7256490011,4.02,0,0.0


# Find Outliers

In [45]:
%%sql
SELECT name, population
    FROM facts
    WHERE population = (SELECT MIN(population) from facts)
    OR population = (SELECT MAX(population) from facts) ;

 * sqlite:///factbook.db
Done.


name,population
Antarctica,0
World,7256490011


Recomputing Summary Stats without world row

In [34]:
%%sql
SELECT MAX(population), MAX(population_growth), MIN(population), MIN(population_growth), AVG(population), AVG(area)
    FROM facts
    WHERE name != 'World';

 * sqlite:///factbook.db
Done.


MAX(population),MAX(population_growth),MIN(population),MIN(population_growth),AVG(population),AVG(area)
1367485388,4.02,0,0.0,32242666.56846473,555093.546184739


# What are the most densely populated countries in the world?

In [96]:
%%sql
SELECT name, population, area_land, (population/area_land) as 'People per Square Km'
    FROM facts
    ORDER BY 4 DESC
    LIMIT 10;
    

 * sqlite:///factbook.db
Done.


name,population,area_land,People per Square Km
Macau,592731,28,21168
Monaco,30535,2,15267
Singapore,5674472,687,8259
Hong Kong,7141106,1073,6655
Gaza Strip,1869055,360,5191
Gibraltar,29258,6,4876
Bahrain,1346613,760,1771
Maldives,393253,298,1319
Malta,413965,316,1310
Bermuda,70196,54,1299


# Which countries have the highest ratios of water to land?

In [97]:
%%sql
SELECT name, area_land, area_water, CAST(ROUND((CAST(area_water AS FLOAT)/CAST(area_land AS FLOAT)),2) AS FLOAT) AS 'water:land'
    FROM facts
    ORDER BY 4 DESC
    LIMIT 10;

 * sqlite:///factbook.db
Done.


name,area_land,area_water,water:land
British Indian Ocean Territory,60,54340,905.67
Virgin Islands,346,1564,4.52
Puerto Rico,8870,4921,0.55
"Bahamas, The",10010,3870,0.39
Guinea-Bissau,28120,8005,0.28
Malawi,94080,24404,0.26
Netherlands,33893,7650,0.23
Uganda,197100,43938,0.22
Eritrea,101000,16600,0.16
Liberia,96320,15049,0.16


# Which countries are experiencing the greatest influx of migrants ?

(taking migration_rate to be the amount of migrants per 1000 population)

Note: This is actually a bit misleading. Looking at Syria's migration_rate, I would assume this value should be negative to 
indicate migration OUT of, not INTO Syria. 

In [98]:
%%sql
SELECT name, population, population_growth, migration_rate, ROUND((migration_rate * (population/1000)), 2) AS 'Approximate Migrants'
    FROM facts
    ORDER BY 5 DESC
    LIMIT 10;

 * sqlite:///factbook.db
Done.


name,population,population_growth,migration_rate,Approximate Migrants
European Union,513949445,0.25,2.5,1284872.5
United States,321368864,0.78,3.86,1240480.48
China,1367485388,0.45,0.44,601693.4
Spain,48146134,0.89,8.31,400093.26
Syria,17064854,0.16,19.79,337696.56
Pakistan,199085847,1.46,1.54,306590.9
Indonesia,255993674,0.92,1.16,296951.88
Italy,61855120,0.27,4.1,253605.5
Russia,142423773,0.04,1.69,240694.87
Philippines,100998376,1.61,2.09,211085.82
