forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 3
/
test_parsing.py
242 lines (202 loc) · 8.24 KB
/
test_parsing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
# -*- coding: utf-8 -*-
"""
Tests for Timestamp parsing, aimed at pandas/_libs/tslibs/parsing.pyx
"""
from datetime import datetime
import numpy as np
import pytest
from dateutil.parser import parse
import pandas as pd
import pandas.util._test_decorators as td
from pandas.conftest import is_dateutil_le_261, is_dateutil_gt_261
from pandas import compat
from pandas.util import testing as tm
from pandas._libs.tslibs import parsing
from pandas._libs.tslibs.parsing import parse_time_string
def test_to_datetime1():
actual = pd.to_datetime(datetime(2008, 1, 15))
assert actual == datetime(2008, 1, 15)
actual = pd.to_datetime('20080115')
assert actual == datetime(2008, 1, 15)
# unparseable
s = 'Month 1, 1999'
assert pd.to_datetime(s, errors='ignore') == s
class TestParseQuarters(object):
def test_parse_time_string(self):
(date, parsed, reso) = parse_time_string('4Q1984')
(date_lower, parsed_lower, reso_lower) = parse_time_string('4q1984')
assert date == date_lower
assert parsed == parsed_lower
assert reso == reso_lower
def test_parse_time_quarter_w_dash(self):
# https://github.com/pandas-dev/pandas/issue/9688
pairs = [('1988-Q2', '1988Q2'), ('2Q-1988', '2Q1988')]
for dashed, normal in pairs:
(date_dash, parsed_dash, reso_dash) = parse_time_string(dashed)
(date, parsed, reso) = parse_time_string(normal)
assert date_dash == date
assert parsed_dash == parsed
assert reso_dash == reso
pytest.raises(parsing.DateParseError, parse_time_string, "-2Q1992")
pytest.raises(parsing.DateParseError, parse_time_string, "2-Q1992")
pytest.raises(parsing.DateParseError, parse_time_string, "4-4Q1992")
class TestDatetimeParsingWrappers(object):
def test_does_not_convert_mixed_integer(self):
bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T')
for bad_date_string in bad_date_strings:
assert not parsing._does_string_look_like_datetime(bad_date_string)
good_date_strings = ('2012-01-01',
'01/01/2012',
'Mon Sep 16, 2013',
'01012012',
'0101',
'1-1')
for good_date_string in good_date_strings:
assert parsing._does_string_look_like_datetime(good_date_string)
def test_parsers_quarterly_with_freq(self):
msg = ('Incorrect quarterly string is given, quarter '
'must be between 1 and 4: 2013Q5')
with tm.assert_raises_regex(parsing.DateParseError, msg):
parsing.parse_time_string('2013Q5')
# GH 5418
msg = ('Unable to retrieve month information from given freq: '
'INVLD-L-DEC-SAT')
with tm.assert_raises_regex(parsing.DateParseError, msg):
parsing.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT')
cases = {('2013Q2', None): datetime(2013, 4, 1),
('2013Q2', 'A-APR'): datetime(2012, 8, 1),
('2013-Q2', 'A-DEC'): datetime(2013, 4, 1)}
for (date_str, freq), exp in compat.iteritems(cases):
result, _, _ = parsing.parse_time_string(date_str, freq=freq)
assert result == exp
def test_parsers_quarter_invalid(self):
cases = ['2Q 2005', '2Q-200A', '2Q-200', '22Q2005', '6Q-20', '2Q200.']
for case in cases:
pytest.raises(ValueError, parsing.parse_time_string, case)
def test_parsers_monthfreq(self):
cases = {'201101': datetime(2011, 1, 1, 0, 0),
'200005': datetime(2000, 5, 1, 0, 0)}
for date_str, expected in compat.iteritems(cases):
result1, _, _ = parsing.parse_time_string(date_str, freq='M')
assert result1 == expected
class TestGuessDatetimeFormat(object):
@td.skip_if_not_us_locale
@is_dateutil_le_261
@pytest.mark.parametrize(
"string, format",
[
('20111230', '%Y%m%d'),
('2011-12-30', '%Y-%m-%d'),
('30-12-2011', '%d-%m-%Y'),
('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'),
('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'),
('2011-12-30 00:00:00.000000',
'%Y-%m-%d %H:%M:%S.%f')])
def test_guess_datetime_format_with_parseable_formats(
self, string, format):
result = parsing._guess_datetime_format(string)
assert result == format
@td.skip_if_not_us_locale
@is_dateutil_gt_261
@pytest.mark.parametrize(
"string",
['20111230', '2011-12-30', '30-12-2011',
'2011-12-30 00:00:00', '2011-12-30T00:00:00',
'2011-12-30 00:00:00.000000'])
def test_guess_datetime_format_with_parseable_formats_gt_261(
self, string):
result = parsing._guess_datetime_format(string)
assert result is None
@is_dateutil_le_261
@pytest.mark.parametrize(
"dayfirst, expected",
[
(True, "%d/%m/%Y"),
(False, "%m/%d/%Y")])
def test_guess_datetime_format_with_dayfirst(self, dayfirst, expected):
ambiguous_string = '01/01/2011'
result = parsing._guess_datetime_format(
ambiguous_string, dayfirst=dayfirst)
assert result == expected
@is_dateutil_gt_261
@pytest.mark.parametrize(
"dayfirst", [True, False])
def test_guess_datetime_format_with_dayfirst_gt_261(self, dayfirst):
ambiguous_string = '01/01/2011'
result = parsing._guess_datetime_format(
ambiguous_string, dayfirst=dayfirst)
assert result is None
@td.skip_if_has_locale
@is_dateutil_le_261
@pytest.mark.parametrize(
"string, format",
[
('30/Dec/2011', '%d/%b/%Y'),
('30/December/2011', '%d/%B/%Y'),
('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S')])
def test_guess_datetime_format_with_locale_specific_formats(
self, string, format):
result = parsing._guess_datetime_format(string)
assert result == format
@td.skip_if_has_locale
@is_dateutil_gt_261
@pytest.mark.parametrize(
"string",
[
'30/Dec/2011',
'30/December/2011',
'30/Dec/2011 00:00:00'])
def test_guess_datetime_format_with_locale_specific_formats_gt_261(
self, string):
result = parsing._guess_datetime_format(string)
assert result is None
def test_guess_datetime_format_invalid_inputs(self):
# A datetime string must include a year, month and a day for it
# to be guessable, in addition to being a string that looks like
# a datetime
invalid_dts = [
'2013',
'01/2013',
'12:00:00',
'1/1/1/1',
'this_is_not_a_datetime',
'51a',
9,
datetime(2011, 1, 1),
]
for invalid_dt in invalid_dts:
assert parsing._guess_datetime_format(invalid_dt) is None
@is_dateutil_le_261
@pytest.mark.parametrize(
"string, format",
[
('2011-1-1', '%Y-%m-%d'),
('30-1-2011', '%d-%m-%Y'),
('1/1/2011', '%m/%d/%Y'),
('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'),
('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'),
('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')])
def test_guess_datetime_format_nopadding(self, string, format):
# GH 11142
result = parsing._guess_datetime_format(string)
assert result == format
@is_dateutil_gt_261
@pytest.mark.parametrize(
"string",
[
'2011-1-1',
'30-1-2011',
'1/1/2011',
'2011-1-1 00:00:00',
'2011-1-1 0:0:0',
'2011-1-3T00:00:0'])
def test_guess_datetime_format_nopadding_gt_261(self, string):
# GH 11142
result = parsing._guess_datetime_format(string)
assert result is None
class TestArrayToDatetime(object):
def test_try_parse_dates(self):
arr = np.array(['5/1/2000', '6/1/2000', '7/1/2000'], dtype=object)
result = parsing.try_parse_dates(arr, dayfirst=True)
expected = np.array([parse(d, dayfirst=True) for d in arr])
tm.assert_numpy_array_equal(result, expected)