Skip to content

Commit

Permalink
astype checks notnull Fixes: pandas-dev#8732 added benchmark,added nu…
Browse files Browse the repository at this point in the history
…ll check
  • Loading branch information
Vikram Bhandoh committed Nov 29, 2014
1 parent 0a2ea0a commit bea542a
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 4 deletions.
4 changes: 2 additions & 2 deletions pandas/core/common.py
Expand Up @@ -2580,14 +2580,14 @@ def _astype_nansafe(arr, dtype, copy=True):
if np.isnan(arr).any():
raise ValueError('Cannot convert NA to integer')
elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer):
has_nulls = isnull(arr).any() or not is_numeric_dtype(arr.dtype)
# work around NumPy brokenness, #1987
return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)
return lib.astype_intsafe(arr.ravel(), dtype, has_nulls).reshape(arr.shape)
elif issubclass(dtype.type, compat.text_type):
# in Py3 that's str, in Py2 that's unicode
return lib.astype_unicode(arr.ravel()).reshape(arr.shape)
elif issubclass(dtype.type, compat.string_types):
return lib.astype_str(arr.ravel()).reshape(arr.shape)

if copy:
return arr.astype(dtype)
return arr.view(dtype)
Expand Down
5 changes: 4 additions & 1 deletion pandas/lib.pyx
Expand Up @@ -827,7 +827,7 @@ def vec_binop(ndarray[object] left, ndarray[object] right, object op):
return maybe_convert_bool(result)


def astype_intsafe(ndarray[object] arr, new_dtype):
def astype_intsafe(ndarray[object] arr, new_dtype, has_nulls):
cdef:
Py_ssize_t i, n = len(arr)
object v
Expand All @@ -837,6 +837,9 @@ def astype_intsafe(ndarray[object] arr, new_dtype):
# on 32-bit, 1.6.2 numpy M8[ns] is a subdtype of integer, which is weird
is_datelike = new_dtype in ['M8[ns]','m8[ns]']

if not is_datelike and not has_nulls:
return arr.astype(new_dtype)

result = np.empty(n, dtype=new_dtype)
for i in range(n):
v = arr[i]
Expand Down
39 changes: 39 additions & 0 deletions pandas/tests/test_frame.py
Expand Up @@ -2141,6 +2141,45 @@ def setUp(self):
self.simple = DataFrame(arr, columns=['one', 'two', 'three'],
index=['a', 'b', 'c'])

def test_hasnulls(self):
# Github Issue 8732
df = tm.makeMissingDataframe()
df['A'][df.index[0]] = np.nan
self.assertEqual(df['A'].notnull().all(), False)
casted = df.astype(np.float64)
expected = DataFrame(df.values.astype(np.float64),
index=df.index,
columns=df.columns)
assert_frame_equal(casted, expected)

casted = df.astype(np.str)
expected = DataFrame(df.values.astype(np.str),
index=df.index,
columns=df.columns)
assert_frame_equal(casted, expected)

df = tm.makeMissingCustomDataframe(10, 5)
col = df.columns[0]
df[col][df.index[0]] = np.nan
self.assertEqual(df[col].notnull().all(), False)
casted = df.astype(np.str)
expected = DataFrame(df.values.astype(np.str),
index=df.index,
columns=df.columns)
assert_frame_equal(casted, expected)

df = tm.makeMixedDataFrameWithNaN()
self.assertEqual(df['A'].notnull().all(), False)
self.assertEqual(df['B'].notnull().all(), False)
self.assertEqual(df['C'].notnull().all(), False)
self.assertEqual(df['D'].notnull().all(), True)

casted = df.astype(np.object)
expected = DataFrame(df.values.astype(np.object),
index=df.index,
columns=df.columns)
assert_frame_equal(casted, expected)

def test_get_axis(self):
f = self.frame
self.assertEqual(f._get_axis_number(0), 0)
Expand Down
9 changes: 8 additions & 1 deletion pandas/tests/test_series.py
Expand Up @@ -560,11 +560,18 @@ def test_scalar_conversion(self):
def test_astype(self):
s = Series(np.random.randn(5),name='foo')

for dtype in ['float32','float64','int64','int32']:
for dtype in ['float32','float64','int64','int32', 'object']:
astyped = s.astype(dtype)
self.assertEqual(astyped.dtype, dtype)
self.assertEqual(astyped.name, s.name)

def test_astype_to(self):
arr = np.random.randint(1, 10, size=100)
s = Series(arr)
for dtype in ['float32', 'float64', 'int64', 'int32', 'object']:
astyped = s.astype(dtype)
self.assertEqual(astyped.dtype, dtype)

def test_constructor(self):
# Recognize TimeSeries
self.assertTrue(self.ts.is_time_series)
Expand Down
14 changes: 14 additions & 0 deletions pandas/util/testing.py
Expand Up @@ -877,6 +877,20 @@ def getMixedTypeDict():

return index, data


def makeMixedDataFrameWithNaN():
index = Index(['a', 'b', 'c', 'd', 'e', 'f'])

data = {
'A': [0., 1., 2., 3., 4., np.nan],
'B': [0., 1., 0., 1., 0., np.nan],
'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5', np.nan],
'D': bdate_range('1/1/2009', periods=6)
}

return DataFrame(data, index=index)


def makeMixedDataFrame():
return DataFrame(getMixedTypeDict()[1])

Expand Down
15 changes: 15 additions & 0 deletions vb_suite/astype.py
@@ -0,0 +1,15 @@
from vbench.api import Benchmark

common_setup = """from pandas_vb_common import *
from datetime import timedelta
import pandas as pd
import numpy as np
N = 1000000
arr = np.random.randint(1,10,size=1000000)
s = pd.Series(arr)
"""

astype_test = Benchmark('s.astype(np.str)',
common_setup,
name='astype_test')
1 change: 1 addition & 0 deletions vb_suite/suite.py
Expand Up @@ -13,6 +13,7 @@
'indexing',
'io_bench',
'io_sql',
'astype',
'inference',
'hdfstore_bench',
'join_merge',
Expand Down

0 comments on commit bea542a

Please sign in to comment.