Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

69 lines (58 sloc) 2.254 kb
import numpy as np
from collections import defaultdict
import gc
import time
from pandas import DataFrame
from pandas.util.testing import rands
import random
N = 10000
indices = np.array([rands(10) for _ in xrange(N)], dtype='O')
indices2 = np.array([rands(10) for _ in xrange(N)], dtype='O')
key = np.tile(indices[:8000], 10)
key2 = np.tile(indices2[:8000], 10)
left = DataFrame({'key' : key, 'key2':key2,
'value' : np.random.randn(80000)})
right = DataFrame({'key': indices[2000:], 'key2':indices2[2000:],
'value2' : np.random.randn(8000)})
right2 = right.append(right, ignore_index=True)
right = right2
# random.shuffle(key2)
# indices2 = indices.copy()
# random.shuffle(indices2)
# Prepare Database
import sqlite3
create_sql_indexes = False
conn = sqlite3.connect(':memory:')
conn.execute('create table left( key varchar(10), key2 varchar(10), value int);')
conn.execute('create table right( key varchar(10), key2 varchar(10), value2 int);')
conn.executemany('insert into left values (?, ?, ?)',
zip(key, key2, left['value']))
conn.executemany('insert into right values (?, ?, ?)',
zip(right['key'], right['key2'], right['value2']))
# Create Indices
if create_sql_indexes:
conn.execute('create index left_ix on left(key, key2)')
conn.execute('create index right_ix on right(key, key2)')
join_methods = ['inner', 'left outer', 'left'] # others not supported
sql_results = DataFrame(index=join_methods, columns=[False])
niter = 5
for sort in [False]:
for join_method in join_methods:
sql = """select *
from left
%s join right
on left.key=right.key
and left.key2 = right.key2""" % join_method
if sort:
sql = '%s order by key, key2' % sql
f = lambda: list(conn.execute(sql)) # list fetches results
g = lambda: conn.execute(sql) # list fetches results
gc.disable()
start = time.time()
for _ in xrange(niter):
f()
elapsed = (time.time() - start) / niter
gc.enable()
sql_results[sort][join_method] = elapsed
sql_results.columns = ['sqlite3'] # ['dont_sort', 'sort']
sql_results.index = ['inner', 'outer', 'left']
Jump to Line
Something went wrong with that request. Please try again.