Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 69 lines (58 sloc) 2.254 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
import numpy as np
from collections import defaultdict
import gc
import time
from pandas import DataFrame
from pandas.util.testing import rands
import random

N = 10000

indices = np.array([rands(10) for _ in xrange(N)], dtype='O')
indices2 = np.array([rands(10) for _ in xrange(N)], dtype='O')
key = np.tile(indices[:8000], 10)
key2 = np.tile(indices2[:8000], 10)

left = DataFrame({'key' : key, 'key2':key2,
                  'value' : np.random.randn(80000)})
right = DataFrame({'key': indices[2000:], 'key2':indices2[2000:],
                   'value2' : np.random.randn(8000)})

right2 = right.append(right, ignore_index=True)
right = right2

# random.shuffle(key2)
# indices2 = indices.copy()
# random.shuffle(indices2)

# Prepare Database
import sqlite3
create_sql_indexes = False

conn = sqlite3.connect(':memory:')
conn.execute('create table left( key varchar(10), key2 varchar(10), value int);')
conn.execute('create table right( key varchar(10), key2 varchar(10), value2 int);')
conn.executemany('insert into left values (?, ?, ?)',
                 zip(key, key2, left['value']))
conn.executemany('insert into right values (?, ?, ?)',
                 zip(right['key'], right['key2'], right['value2']))

# Create Indices
if create_sql_indexes:
    conn.execute('create index left_ix on left(key, key2)')
    conn.execute('create index right_ix on right(key, key2)')


join_methods = ['inner', 'left outer', 'left'] # others not supported
sql_results = DataFrame(index=join_methods, columns=[False])
niter = 5
for sort in [False]:
    for join_method in join_methods:
        sql = """select *
from left
%s join right
on left.key=right.key
and left.key2 = right.key2""" % join_method
        if sort:
            sql = '%s order by key, key2' % sql
        f = lambda: list(conn.execute(sql)) # list fetches results
        g = lambda: conn.execute(sql) # list fetches results
        gc.disable()
        start = time.time()
        for _ in xrange(niter):
            f()
        elapsed = (time.time() - start) / niter
        gc.enable()
        sql_results[sort][join_method] = elapsed
sql_results.columns = ['sqlite3'] # ['dont_sort', 'sort']
sql_results.index = ['inner', 'outer', 'left']
Something went wrong with that request. Please try again.