Browse files

ENH: tinkering around with returning base instead of view, not workin…

…g yet
  • Loading branch information...
1 parent f3ca67d commit 76c16e920bc7fa605e8f13163b9819501f33aeba @wesm committed Jan 6, 2012
View
25 bench/bench_merge.R
@@ -82,7 +82,7 @@ sort.options <- c(FALSE, TRUE)
# many-to-one
-results <- matrix(nrow=3, ncol=3)
+results <- matrix(nrow=4, ncol=3)
colnames(results) <- c("base::merge", "plyr", "data.table")
rownames(results) <- c("inner", "outer", "left", "right")
@@ -121,11 +121,11 @@ inner.join <- function(sort=FALSE) {
}
left.join.dt <- function(sort=FALSE) {
- result <- merge(left.dt, right2.dt, all.x=TRUE, sort=sort)
+ result <- right2.dt[left.dt]
}
right.join.dt <- function(sort=FALSE) {
- result <- merge(left.dt, right2.dt, all.y=TRUE, sort=sort)
+ result <- left.dt[right2.dt]
}
outer.join.dt <- function(sort=FALSE) {
@@ -140,19 +140,22 @@ sort.options <- c(FALSE, TRUE)
# many-to-one
-results <- matrix(nrow=3, ncol=2)
-colnames(results) <- c("base::merge", "data.table")
-rownames(results) <- c("inner", "outer", "left")
+results <- matrix(nrow=4, ncol=3)
+colnames(results) <- c("base::merge", "plyr", "data.table")
+rownames(results) <- c("inner", "outer", "left", "right")
-base.functions <- c(inner.join, outer.join, left.join)
+base.functions <- c(inner.join, outer.join, left.join, right.join)
plyr.functions <- c(function() plyr.join("inner"),
function() plyr.join("full"),
- function() plyr.join("left"))
-dt.functions <- c(inner.join.dt, outer.join.dt, left.join.dt)
-for (i in 1:3) {
+ function() plyr.join("left"),
+ function() plyr.join("right"))
+dt.functions <- c(inner.join.dt, outer.join.dt, left.join.dt, right.join.dt)
+for (i in 1:4) {
base.func <- base.functions[[i]]
plyr.func <- plyr.functions[[i]]
dt.func <- dt.functions[[i]]
results[i, 1] <- timeit(base.func)
- results[i, 2] <- timeit(dt.func)
+ results[i, 2] <- timeit(plyr.func)
+ results[i, 3] <- timeit(dt.func)
}
+
View
2 bench/bench_merge.py
@@ -51,7 +51,7 @@ def get_test_data(ngroups=100, n=N):
niter = 10
for sort in [False]:
for join_method in join_methods:
- f = lambda: merge(left, right2, how=join_method, sort=sort)
+ f = lambda: merge(left, right, how=join_method, sort=sort)
gc.disable()
start = time.time()
for _ in xrange(niter):
View
16 bench/bench_merge_sqlite.py
@@ -18,8 +18,8 @@
right = DataFrame({'key': indices[2000:], 'key2':indices2[2000:],
'value2' : np.random.randn(8000)})
-right2 = right.append(right, ignore_index=True)
-right = right2
+# right2 = right.append(right, ignore_index=True)
+# right = right2
# random.shuffle(key2)
# indices2 = indices.copy()
@@ -53,6 +53,12 @@
%s join right
on left.key=right.key
and left.key2 = right.key2;""" % join_method
+ sql = """select *
+ from left
+ %s join right
+ on left.key=right.key
+ and left.key2 = right.key2;""" % join_method
+
if sort:
sql = '%s order by key, key2' % sql
f = lambda: list(conn.execute(sql)) # list fetches results
@@ -70,3 +76,9 @@
sql_results[sort][join_method] = elapsed
sql_results.columns = ['sqlite3'] # ['dont_sort', 'sort']
sql_results.index = ['inner', 'outer', 'left']
+
+ sql = """select *
+ from left
+ inner join right
+ on left.key=right.key
+ and left.key2 = right.key2;"""
View
2 pandas/core/series.py
@@ -597,6 +597,8 @@ def keys(self):
"Alias for index"
return self.index
+ # values = lib.ValuesProperty()
+
@property
def values(self):
"""
View
17 pandas/src/numpy_helper.h
@@ -71,3 +71,20 @@ get_value_1d(PyArrayObject* ap, Py_ssize_t i) {
char *item = (char *) PyArray_DATA(ap) + i * PyArray_STRIDE(ap, 0);
return PyArray_Scalar(item, PyArray_DESCR(ap), (PyObject*) ap);
}
+
+PANDAS_INLINE PyObject*
+get_base_ndarray(PyObject* ap) {
+ // if (!ap || (NULL == ap)) {
+ // Py_RETURN_NONE;
+ // }
+
+ while (!PyArray_CheckExact(ap)) {
+ ap = PyArray_BASE((PyArrayObject*) ap);
+ if (ap == Py_None) Py_RETURN_NONE;
+ }
+ // PyArray_BASE is a borrowed reference
+ if(ap) {
+ Py_INCREF(ap);
+ }
+ return ap;
+}
View
20 pandas/src/properties.pyx
@@ -44,7 +44,6 @@ cdef class AxisProperty(object):
cdef class SeriesIndex(object):
cdef:
- Py_ssize_t axis
object _check_type
def __init__(self):
@@ -58,3 +57,22 @@ cdef class SeriesIndex(object):
if len(obj) != len(value):
raise AssertionError('Index length did not match values')
obj._index = self._check_type(value)
+
+cdef class ValuesProperty(object):
+
+ def __get__(self, obj, type):
+ cdef:
+ ndarray arr = obj
+ object base
+
+ base = np.get_array_base(arr)
+ if base is None:
+ arr = arr.view(np.ndarray)
+ else:
+ arr = base
+ while arr is not None and not np.PyArray_CheckExact(arr):
+ base = np.get_array_base(arr)
+ if base is None:
+ break
+ arr = base
+ return arr
View
19 pandas/src/sandbox.pyx
@@ -35,8 +35,8 @@ def bench_dict():
from cpython cimport PyObject
-cdef extern from "numpy/arrayobject.h":
- bint PyArray_Check(PyObject*)
+# cdef extern from "numpy/arrayobject.h":
+# bint PyArray_Check(PyObject*)
cimport cython
@@ -48,12 +48,12 @@ def bench_typecheck1(ndarray[object] arr):
for i in range(n):
cpython.PyFloat_Check(arr[i])
-def bench_typecheck2(ndarray[object] arr):
- cdef Py_ssize_t i, n
- cdef PyObject** buf = <PyObject**> arr.data
- n = cnp.PyArray_SIZE(arr)
- for i in range(n):
- PyArray_Check(buf[i])
+# def bench_typecheck2(ndarray[object] arr):
+# cdef Py_ssize_t i, n
+# cdef PyObject** buf = <PyObject**> arr.data
+# n = cnp.PyArray_SIZE(arr)
+# for i in range(n):
+# PyArray_Check(buf[i])
def foo(object _chunk, object _arr):
@@ -149,3 +149,6 @@ cimport util
def foo2(o):
return util.is_integer_object(o)
+
+def foo3(o):
+ return util.get_base_ndarray(o)
View
2 pandas/src/util.pxd
@@ -8,6 +8,8 @@ cdef extern from "numpy_helper.h":
inline int is_string_object(object)
inline int assign_value_1d(ndarray, Py_ssize_t, object) except -1
inline object get_value_1d(ndarray, Py_ssize_t)
+ inline object get_base_ndarray(object)
+
cdef inline object get_value_at(ndarray arr, object loc):
cdef:

0 comments on commit 76c16e9

Please sign in to comment.