Skip to content

Commit

Permalink
ENH: DataFrame.__init__ will accept structured arrays. general perfor…
Browse files Browse the repository at this point in the history
…mance tweak in constructor too
  • Loading branch information
wesm committed Jul 20, 2011
1 parent 54066db commit 955b727
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 15 deletions.
38 changes: 25 additions & 13 deletions pandas/core/frame.py
Expand Up @@ -147,9 +147,18 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
mgr = mgr.cast(dtype)
elif isinstance(data, dict):
mgr = self._init_dict(data, index, columns, dtype=dtype)
elif isinstance(data, (np.ndarray, list)):
mgr = self._init_matrix(data, index, columns, dtype=dtype,
copy=copy)
elif isinstance(data, np.ndarray):
if data.dtype.names:
data_columns, data = _rec_to_dict(data)
if columns is None:
columns = data_columns
mgr = self._init_dict(data, index, columns, dtype=dtype)
else:
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
copy=copy)
elif isinstance(data, list):
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
copy=copy)
else:
raise PandasError('DataFrame constructor not properly called!')

Expand Down Expand Up @@ -183,8 +192,8 @@ def _init_dict(self, data, index, columns, dtype=None):
mgr = BlockManager(blocks, [columns, index])
return mgr.consolidate()

def _init_matrix(self, values, index, columns, dtype=None,
copy=False):
def _init_ndarray(self, values, index, columns, dtype=None,
copy=False):
values = _prep_ndarray(values, copy=copy)

if dtype is not None:
Expand Down Expand Up @@ -347,16 +356,13 @@ def from_records(cls, data, indexField=None):
-------
DataFrame
"""
# Dtype when you have records
if not issubclass(data.dtype.type, np.void):
if not data.dtype.names:
raise Exception('Input was not a structured array!')

columns = data.dtype.names
sdict = dict((k, data[k]) for k in columns)

columns, sdict = _rec_to_dict(data)
if indexField is not None:
index = sdict.pop(indexField)
columns = [c for c in columns if c != indexField]
columns.remove(indexField)
else:
index = np.arange(len(data))

Expand Down Expand Up @@ -2484,6 +2490,12 @@ def _prep_ndarray(values, copy=True):

return values


def _rec_to_dict(arr):
columns = list(arr.dtype.names)
sdict = dict((k, arr[k]) for k in columns)
return columns, sdict

def _homogenize_series(data, index, dtype=None):
homogenized = {}

Expand All @@ -2507,9 +2519,9 @@ def _homogenize_series(data, index, dtype=None):

# only *attempt* to cast to dtype
try:
v = Series(v, dtype=dtype, index=index)
v = np.asarray(v, dtype=dtype)
except Exception:
v = Series(v, index=index)
v = np.asarray(v)

homogenized[k] = v

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals.py
Expand Up @@ -731,7 +731,7 @@ def _simple_blockify(dct, ref_items, dtype):

def _stack_dict(dct, ref_items):
items = [x for x in ref_items if x in dct]
stacked = np.vstack([dct[k].values for k in items])
stacked = np.vstack([np.asarray(dct[k]) for k in items])
return items, stacked

def _blocks_to_series_dict(blocks, index=None):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/sparse.py
Expand Up @@ -783,7 +783,7 @@ def density(self):
#----------------------------------------------------------------------
# Support different internal rep'n of SparseDataFrame

def _insert_item(self, key, value):
def _set_item(self, key, value):
sp_maker = lambda x: SparseSeries(x, index=self.index,
fill_value=self.default_fill_value,
kind=self.default_kind)
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/test_frame.py
Expand Up @@ -626,6 +626,20 @@ def test_constructor_mixed(self):

self.assertEqual(self.mixed_frame['foo'].dtype, np.object_)

def test_constructor_rec(self):
rec = self.frame.to_records(index=False)

rec.dtype.names = list(rec.dtype.names)[::-1]

index = self.frame.index

df = DataFrame(rec)
self.assert_(np.array_equal(df.columns, rec.dtype.names))

df2 = DataFrame(rec, index=index)
self.assert_(np.array_equal(df2.columns, rec.dtype.names))
self.assert_(df2.index.equals(index))

def test_constructor_bool(self):
df = DataFrame({0 : np.ones(10, dtype=bool),
1 : np.zeros(10, dtype=bool)})
Expand Down

0 comments on commit 955b727

Please sign in to comment.