diff --git a/dateparser/parser.py b/dateparser/parser.py index 67a586e9b..f123ab33a 100644 --- a/dateparser/parser.py +++ b/dateparser/parser.py @@ -598,16 +598,11 @@ def _correct_for_day(self, dateobj): return dateobj def _correct_for_month(self, dateobj): - relative_base = getattr(self.settings, "RELATIVE_BASE", None) - relative_base_month = ( - relative_base.month if hasattr(relative_base, "month") else relative_base - ) - if getattr(self, "_token_month", None): return dateobj dateobj = set_correct_month_from_settings( - dateobj, self.settings, relative_base_month + dateobj, self.settings, current_month=self.now.month ) return dateobj diff --git a/dateparser/search/search.py b/dateparser/search/search.py index 3d38d2792..6d39c7570 100644 --- a/dateparser/search/search.py +++ b/dateparser/search/search.py @@ -218,6 +218,21 @@ def __init__(self): self.available_language_map = self.loader.get_locale_map() self.search = _ExactLanguageSearch(self.loader) + def _get_candidate_languages(self, detected_language, languages): + candidates = [] + if detected_language: + candidates.append(detected_language) + + if isinstance(languages, (list, tuple, Set)) and len(languages) > 1: + candidates.extend(languages) + + seen = set() + return [ + language + for language in candidates + if not (language in seen or seen.add(language)) + ] + @apply_settings def detect_language( self, text, languages, settings=None, detect_languages_function=None @@ -304,13 +319,23 @@ def search_dates( settings=settings, detect_languages_function=detect_languages_function, ) - if not language_shortname: + + candidate_languages = self._get_candidate_languages( + language_shortname, languages + ) + if not candidate_languages: return {"Language": None, "Dates": None} + + for candidate_language in candidate_languages: + dates = self.search.search_parse( + candidate_language, text, settings=settings + ) + if dates: + return {"Language": candidate_language, "Dates": dates} + return { "Language": language_shortname, - "Dates": self.search.search_parse( - language_shortname, text, settings=settings - ), + "Dates": [], } def preprocess_text(self, text, languages): diff --git a/docs/supported_locales.rst b/docs/supported_locales.rst index 9b48b5ed5..49b8503cf 100644 --- a/docs/supported_locales.rst +++ b/docs/supported_locales.rst @@ -3,6 +3,12 @@ Supported languages and locales =============================== +The ``region`` argument accepts the region subtag from one of the locale +codes listed below. For example, ``languages=['en'], region='IN'`` uses the +``en-IN`` locale, while ``languages=['fr'], region='CA'`` uses ``fr-CA``. +If you already know the full locale code, pass it directly through +``locales=['en-IN']`` instead. + ============ ================================================================ Language Locales ============ ================================================================ diff --git a/tests/test_date_parser.py b/tests/test_date_parser.py index 212e0220d..87a5a871c 100644 --- a/tests/test_date_parser.py +++ b/tests/test_date_parser.py @@ -1346,6 +1346,23 @@ def test_dates_with_no_day_or_month( self.then_date_was_parsed_by_date_parser() self.then_date_obj_exactly_is(expected) + def test_dates_with_no_day_or_month_use_same_current_date_for_month_and_day(self): + class ParserDateTime(datetime): + @classmethod + def now(cls, tz=None): + return datetime(2026, 5, 31, 12, 0, tzinfo=tz) + + class UtilsDateTime(datetime): + @classmethod + def now(cls, tz=None): + return datetime(2026, 6, 1, 12, 0, tzinfo=tz) + + with ( + patch("dateparser.parser.datetime", ParserDateTime), + patch("dateparser.utils.datetime", UtilsDateTime), + ): + self.assertEqual(parse("2014"), datetime(2014, 5, 31)) + @parameterized.expand( [ param( diff --git a/tests/test_search.py b/tests/test_search.py index 47e380cf7..eea9360f0 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -1020,6 +1020,12 @@ def test_detection(self, shortname, text): settings={"STRICT_PARSING": True}, expected=None, ), + param( + text="Date de facture 23 juillet 2020 Condition Redevable livraison FR", + languages=["en", "fr", "es", "pt", "de", "it", "ar"], + settings={"STRICT_PARSING": True}, + expected=[("23 juillet 2020", datetime.datetime(2020, 7, 23, 0, 0))], + ), param(text="a Americ", languages=None, settings=None, expected=None), # Date with comma and apostrophe param(