## Store Numpy Arrays in a SQLite Database

In [4]:
import os
import io
import sqlite3
import numpy as np

#### Load the P3B3 Synthetic Data 

In [5]:
!ls ../data/numpy/P3B3/processed/

test_data.npy    test_labels.npy  train_data.npy   train_labels.npy


In [6]:
ARRY_PATH = '../data/numpy/P3B3/processed/'

In [7]:
def load_data(path, trainset=True):
    """ Load the P3B3 array data """
    if trainset:
        data = np.load(os.path.join(path, 'train_data.npy'))
        label = np.load(os.path.join(path, 'train_labels.npy'))
    else:
        data = np.load(os.path.join(path, 'test_data.npy'))
        label = np.load(os.path.join(path, 'test_labels.npy'))
        
    return (data, label)

In [8]:
def batch_arrays(x_train, y_train):
    """ Create a list of tuples, each tuple being a single sample of data """
    assert x_train.shape[0] == y_train.shape[0]
    num_samples = x_train.shape[0]
    return [(x_train[i], y_train[i]) for i in range(num_samples)]

In [9]:
x_train, y_train = load_data(ARRY_PATH)
x_test, y_test = load_data(ARRY_PATH, False)

array_data = batch_arrays(x_train, y_train)

#### Helper functions to store nd.arrays into SQLite

In [10]:
def adapt_array(arr):
    """ Convert numpy array to binary string
    
    References
    ----------
    http://stackoverflow.com/a/31312102/190597 (SoulNibbler)
    """
    out = io.BytesIO()
    np.save(out, arr)
    out.seek(0)
    return sqlite3.Binary(out.read())

In [11]:
def convert_array(text):
    """ Convert text to numpy array """
    out = io.BytesIO(text)
    out.seek(0)
    return np.load(out)

In [12]:
# Converts np.array to TEXT when inserting
sqlite3.register_adapter(np.ndarray, adapt_array)

# Converts TEXT to np.array when selecting
sqlite3.register_converter("array", convert_array)

#### Create the database

In [13]:
conn = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES)
cur = conn.cursor()

In [14]:
with conn:
    cur.execute("""
        CREATE TABLE trainset (
            data array,
            label array
        )"""
    )

In [15]:
with conn:
    cur.executemany("INSERT INTO trainset VALUES (?,?)", array_data)

#### Fetch the first sample of data with label corresponding labels

In [16]:
with conn:
    cur.execute("SELECT * from trainset")
    
data, label = cur.fetchone()

In [17]:
data

array([ 35, 197, 232, ...,   0,   0,   0])

In [18]:
label

array([5, 0, 1, 2])