forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 3
/
test_to_from_scipy.py
185 lines (151 loc) · 6.56 KB
/
test_to_from_scipy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
from distutils.version import LooseVersion
import numpy as np
import pytest
from pandas.core.dtypes.common import is_bool_dtype
import pandas as pd
from pandas import SparseDataFrame, SparseSeries
from pandas.core.sparse.api import SparseDtype
from pandas.util import testing as tm
scipy = pytest.importorskip('scipy')
ignore_matrix_warning = pytest.mark.filterwarnings(
"ignore:the matrix subclass:PendingDeprecationWarning"
)
@pytest.mark.parametrize('index', [None, list('abc')]) # noqa: F811
@pytest.mark.parametrize('columns', [None, list('def')])
@pytest.mark.parametrize('fill_value', [None, 0, np.nan])
@pytest.mark.parametrize('dtype', [bool, int, float, np.uint16])
@ignore_matrix_warning
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
# GH 4343
# Make one ndarray and from it one sparse matrix, both to be used for
# constructing frames and comparing results
arr = np.eye(3, dtype=dtype)
# GH 16179
arr[0, 1] = dtype(2)
try:
spm = spmatrix(arr)
assert spm.dtype == arr.dtype
except (TypeError, AssertionError):
# If conversion to sparse fails for this spmatrix type and arr.dtype,
# then the combination is not currently supported in NumPy, so we
# can just skip testing it thoroughly
return
sdf = SparseDataFrame(spm, index=index, columns=columns,
default_fill_value=fill_value)
# Expected result construction is kind of tricky for all
# dtype-fill_value combinations; easiest to cast to something generic
# and except later on
rarr = arr.astype(object)
rarr[arr == 0] = np.nan
expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
fill_value if fill_value is not None else np.nan)
# Assert frame is as expected
sdf_obj = sdf.astype(object)
tm.assert_sp_frame_equal(sdf_obj, expected)
tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())
# Assert spmatrices equal
assert dict(sdf.to_coo().todok()) == dict(spm.todok())
# Ensure dtype is preserved if possible
# XXX: verify this
res_dtype = bool if is_bool_dtype(dtype) else dtype
tm.assert_contains_all(sdf.dtypes.apply(lambda dtype: dtype.subtype),
{np.dtype(res_dtype)})
assert sdf.to_coo().dtype == res_dtype
# However, adding a str column results in an upcast to object
sdf['strings'] = np.arange(len(sdf)).astype(str)
assert sdf.to_coo().dtype == np.object_
@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811
@ignore_matrix_warning
@pytest.mark.filterwarnings("ignore:object dtype is not supp:UserWarning")
def test_from_to_scipy_object(spmatrix, fill_value):
# GH 4343
dtype = object
columns = list('cd')
index = list('ab')
if (spmatrix is scipy.sparse.dok_matrix and LooseVersion(
scipy.__version__) >= LooseVersion('0.19.0')):
pytest.skip("dok_matrix from object does not work in SciPy >= 0.19")
# Make one ndarray and from it one sparse matrix, both to be used for
# constructing frames and comparing results
arr = np.eye(2, dtype=dtype)
try:
spm = spmatrix(arr)
assert spm.dtype == arr.dtype
except (TypeError, AssertionError):
# If conversion to sparse fails for this spmatrix type and arr.dtype,
# then the combination is not currently supported in NumPy, so we
# can just skip testing it thoroughly
return
sdf = SparseDataFrame(spm, index=index, columns=columns,
default_fill_value=fill_value)
# Expected result construction is kind of tricky for all
# dtype-fill_value combinations; easiest to cast to something generic
# and except later on
rarr = arr.astype(object)
rarr[arr == 0] = np.nan
expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
fill_value if fill_value is not None else np.nan)
# Assert frame is as expected
sdf_obj = sdf.astype(SparseDtype(object, fill_value))
tm.assert_sp_frame_equal(sdf_obj, expected)
tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())
# Assert spmatrices equal
assert dict(sdf.to_coo().todok()) == dict(spm.todok())
# Ensure dtype is preserved if possible
res_dtype = object
tm.assert_contains_all(sdf.dtypes.apply(lambda dtype: dtype.subtype),
{np.dtype(res_dtype)})
assert sdf.to_coo().dtype == res_dtype
@ignore_matrix_warning
def test_from_scipy_correct_ordering(spmatrix):
# GH 16179
arr = np.arange(1, 5).reshape(2, 2)
try:
spm = spmatrix(arr)
assert spm.dtype == arr.dtype
except (TypeError, AssertionError):
# If conversion to sparse fails for this spmatrix type and arr.dtype,
# then the combination is not currently supported in NumPy, so we
# can just skip testing it thoroughly
return
sdf = SparseDataFrame(spm)
expected = SparseDataFrame(arr)
tm.assert_sp_frame_equal(sdf, expected)
tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())
@ignore_matrix_warning
def test_from_scipy_fillna(spmatrix):
# GH 16112
arr = np.eye(3)
arr[1:, 0] = np.nan
try:
spm = spmatrix(arr)
assert spm.dtype == arr.dtype
except (TypeError, AssertionError):
# If conversion to sparse fails for this spmatrix type and arr.dtype,
# then the combination is not currently supported in NumPy, so we
# can just skip testing it thoroughly
return
sdf = SparseDataFrame(spm).fillna(-1.0)
# Returning frame should fill all nan values with -1.0
expected = SparseDataFrame({
0: SparseSeries([1., -1, -1]),
1: SparseSeries([np.nan, 1, np.nan]),
2: SparseSeries([np.nan, np.nan, 1]),
}, default_fill_value=-1)
# fill_value is expected to be what .fillna() above was called with
# We don't use -1 as initial fill_value in expected SparseSeries
# construction because this way we obtain "compressed" SparseArrays,
# avoiding having to construct them ourselves
for col in expected:
expected[col].fill_value = -1
tm.assert_sp_frame_equal(sdf, expected)
def test_index_names_multiple_nones():
# https://github.com/pandas-dev/pandas/pull/24092
sparse = pytest.importorskip("scipy.sparse")
s = (pd.Series(1, index=pd.MultiIndex.from_product([['A', 'B'], [0, 1]]))
.to_sparse())
result, _, _ = s.to_coo()
assert isinstance(result, sparse.coo_matrix)
result = result.toarray()
expected = np.ones((2, 2), dtype="int64")
tm.assert_numpy_array_equal(result, expected)