# 1.1 - Welcome to the course!

#### > Reading a text file

In [None]:
filename = 'huck_finn.txt'
file = open(filename, mode='r') # 'r' is to read
text = file.read()
file.close()

#### > Writing to a file

In [None]:
filename ='huck_finn.txt'
file = open(filename, mode='w') # 'w' is to write
file.close()

#### > Context manager with

In [None]:
with open('huck_finn.txt', 'r') as file:
    print(file.read())

# 1.2 - The importance of flat files in data science

# 1.3 - Importing flat files using NumPy

#### > Importing flat files using NumPy

In [None]:
import numpy as np
filename = 'MNIST.txt'
data = np.loadtxt(filename, delimiter=',')
data

#### > Customizing your NumPy import

In [None]:
import numpy as np
filename = 'MNIST_header.txt'
data = np.loadtxt(filename, delimiter=',', skiprows=1)
print(data)

In [None]:
import numpy as np
filename = 'MNIST_header.txt'
data = np.loadtxt(filename, delimiter=',', skiprows=1, usecols=[0, 2])
print(data)

In [None]:
data = np.loadtxt(filename, delimiter=',', dtype=str)

# 1.4 - Importing flat files using pandas

In [None]:
import pandas as pd
filename = 'winequality-red.csv'
data = pd.read_csv(filename)
data.head()

# 1.5 - Final thoughts on data import

# 2.1 - Introduction to other file types

#### > Pickled files

In [None]:
import pickle
with open('pickled_fruit.pkl','rb') as file:
    data = pickle.load(file)
print(data)

#### > Importing Excel spreadsheets

In [None]:
import pandas as pd
file = 'urbanpop.xlsx'
data = pd.ExcelFile(file)
print(data.sheet_names)

In [None]:
df1 = data.parse('1960-1966') # sheet name, as a string
df2 = data.parse(0) # sheet index, as a float

# 2.2 - Importing SAS/Stata files using pandas

#### > Importing SAS files

In [None]:
import pandas as pd
from sas7bdat import SAS7BDAT
with SAS7BDAT('urbanpop.sas7bdat') as file:
    df_sas = file.to_data_frame()

#### > Importing Stata files

In [None]:
import pandas as pd
data = pd.read_stata('urbanpop.dta')

# 2.3 - Importing HDF5 files

#### > Importing HDF5 files

In [None]:
import h5py
filename ='H-H1_LOSC_4_V1-815411200-4096.hdf5'
data = h5py.File(filename,'r') # 'r' is to read
print(type(data))

#### > The structure of HDF5 files

In [None]:
for key in data.keys():
    print(key)

In [None]:
print(type(data['meta']))

In [None]:
for key in data['meta'].keys():
print(key)

In [None]:
print(data['meta']['Description'].value, data['meta']['Detector'].value)

# 2.4 - Importing MATLAB files

In [None]:
import scipy.io
filename = 'workspace.mat'
mat = scipy.io.loadmat(filename)
print(type(mat))

In [None]:
print(type(mat['x']))

# 3.1 - Introduction to relational databases

# 3.2 - Creating a database engine in Python

In [None]:
from sqlalchemy import create_engine
engine = create_engine('sqlite:///Northwind.sqlite')

#### > Getting table names 

In [None]:
table_names = engine.table_names()
print(table_names)

# 3.3 - Querying relational databases in Python

#### > Basic SQL query

In [None]:
SELECT * FROM Table_Name

In [None]:
SELECT * FROM Orders

#### > Your first SQL query

In [None]:
from sqlalchemy import create_engine
import pandas as pd
engine = create_engine('sqlite:///Northwind.sqlite')
con = engine.connect()
rs = con.execute("SELECT * FROM Orders")
df = pd.DataFrame(rs.fetchall())
con.close()

#### > Set the DataFrame column names

In [None]:
from sqlalchemy import create_engine
import pandas as pd
engine = create_engine('sqlite:///Northwind.sqlite')
con = engine.connect()
rs = con.execute("SELECT * FROM Orders")
df = pd.DataFrame(rs.fetchall())
df.columns = rs.keys()
con.close()

#### > Using the context manager

In [None]:
from sqlalchemy import create_engine
import pandas as pd
engine = create_engine('sqlite:///Northwind.sqlite')

In [None]:
with engine.connect() as con:
    rs = con.execute("SELECT OrderID, OrderDate, ShipName FROM Order
    df = pd.DataFrame(rs.fetchmany(size=5))
    df.columns = rs.keys()

# 3.4 - Querying relational databases directly with pandas

#### > The pandas way to query

In [None]:
from sqlalchemy import create_engine
import pandas as pd
engine = create_engine('sqlite:///Northwind.sqlite')
with engine.connect() as con:
    rs = con.execute("SELECT * FROM Orders")
    df = pd.DataFrame(rs.fetchall())
    df.columns = rs.keys()

In [None]:
df = pd.read_sql_query("SELECT * FROM Orders", engine)

# 3.5 - Advanced querying:exploiting table relationships

#### > INNER JOIN in Python (pandas)

In [None]:
from sqlalchemy import create_engine
import pandas as pd
engine = create_engine('sqlite:///Northwind.sqlite')
df = pd.read_sql_query("SELECT OrderID, CompanyName FROM Orders
INNER JOIN Customers on Orders.CustomerID = Customers.CustomerID", engine)
print(df.head())

# 3.6 - Final Thoughts