Skip to content

Commit

Permalink
Add DateFormatParser
Browse files Browse the repository at this point in the history
  • Loading branch information
thiemowmde committed Jul 20, 2015
1 parent 64be073 commit c0d14e9
Show file tree
Hide file tree
Showing 2 changed files with 378 additions and 0 deletions.
267 changes: 267 additions & 0 deletions src/ValueParsers/DateFormatParser.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
<?php

namespace ValueParsers;

use DataValues\TimeValue;

/**
* This parser is in essence the inverse operation of Language::sprintfDate.
*
* @see Language::sprintfDate
*
* @since 0.8.1
*
* @licence GNU GPL v2+
* @author Thiemo Mättig
*/
class DateFormatParser extends StringValueParser {

const FORMAT_NAME = 'datetime';

const OPT_DATE_FORMAT = 'dateFormat';
const OPT_DIGIT_TRANSFORM_TABLE = 'digitTransformTable';
const OPT_MONTH_NAMES = 'monthNames';

public function __construct( ParserOptions $options = null ) {
parent::__construct( $options );

$this->defaultOption( self::OPT_DATE_FORMAT, 'j F Y' );
$this->defaultOption( self::OPT_DIGIT_TRANSFORM_TABLE, null );
$this->defaultOption( self::OPT_MONTH_NAMES, null );
}

/**
* @see StringValueParser::stringParse
*
* @param string $value
*
* @throws ParseException
* @return TimeValue
*/
protected function stringParse( $value ) {
$format = $this->getDateFormat();
$numberCharacters = $this->getNumberCharacters();
$pattern = '';
$formatLength = strlen( $format );

for ( $p = 0; $p < $formatLength; $p++ ) {
$code = $format[$p];

if ( $code === 'x' && $p < $formatLength - 1 ) {
$code .= $format[++$p];
}

if ( preg_match( '/^x[ijkmot]$/', $code ) && $p < $formatLength - 1 ) {
$code .= $format[++$p];
}

switch ( $code ) {
case 'Y':
$pattern .= '(?P<year>[' . $numberCharacters . ']+)\p{Z}*';
break;
case 'F':
case 'm':
case 'M':
case 'n':
case 'xg':
$pattern .= '(?P<month>[' . $numberCharacters . ']{1,2}';
foreach ( $this->getMonthNames() as $i => $monthNames ) {
$pattern .= '|(?P<month' . $i . '>'
. implode( '|', array_map(
function( $monthName ) {
return preg_quote( $monthName, '/' );
}, $monthNames
) )
. ')';
}
$pattern .= ')\p{P}*\p{Z}*';
break;
case 'd':
case 'j':
$pattern .= '(?P<day>[' . $numberCharacters . ']{1,2})\p{P}*\p{Z}*';
break;
case 'G':
case 'H':
$pattern .= '(?P<hour>[' . $numberCharacters . ']{1,2})\p{Z}*';
break;
case 'i':
$pattern .= '(?P<minute>[' . $numberCharacters . ']{1,2})\p{Z}*';
break;
case 's':
$pattern .= '(?P<second>[' . $numberCharacters . ']{1,2})\p{Z}*';
break;
case '\\':
if ( $p < $formatLength - 1 ) {
$pattern .= preg_quote( $format[++$p], '/' );
} else {
$pattern .= '\\';
}
break;
case '"':
$endQuote = strpos( $format, '"', $p + 1 );
if ( $endQuote !== false ) {
$pattern .= preg_quote( substr( $format, $p + 1, $endQuote - $p - 1 ), '/' );
$p = $endQuote;
} else {
$pattern .= '"';
}
break;
case 'xn':
case 'xN':
// We can ignore raw and raw toggle when parsing
break;
default:
if ( preg_match( '/^\p{P}+$/u', $format[$p] ) ) {
$pattern .= '\p{P}*';
} elseif ( preg_match( '/^\p{Z}+$/u', $format[$p] ) ) {
$pattern .= '\p{Z}*';
} else {
$pattern .= preg_quote( $format[$p], '/' );
}
}
}

$isMatch = preg_match( '/^\p{Z}*' . $pattern . '$/iu', $value, $matches );
// if ( $isMatch ) { var_dump( $matches ); die(); }
if ( $isMatch && isset( $matches['year'] ) ) {
$precision = TimeValue::PRECISION_YEAR;
$time = array( $this->normalizeNumber( $matches['year'] ), 0, 0, 0, 0, 0 );

// if ( $value === '05:42, 4. Mär. 1201' ) { var_dump( $matches ); die(); }
if ( isset( $matches['month'] ) ) {
$precision = TimeValue::PRECISION_MONTH;
$time[1] = $this->findMonthMatch( $matches );
}

if ( isset( $matches['day'] ) ) {
$precision = TimeValue::PRECISION_DAY;
$time[2] = $this->normalizeNumber( $matches['day'] );
}

if ( isset( $matches['hour'] ) ) {
$precision = TimeValue::PRECISION_HOUR;
$time[3] = $this->normalizeNumber( $matches['hour'] );
}

if ( isset( $matches['minute'] ) ) {
$precision = TimeValue::PRECISION_MINUTE;
$time[4] = $this->normalizeNumber( $matches['minute'] );
}

if ( isset( $matches['second'] ) ) {
$precision = TimeValue::PRECISION_SECOND;
$time[5] = $this->normalizeNumber( $matches['second'] );
}

// TODO: Check for Int32 overflows.
$timestamp = vsprintf( '%+.0f-%02d-%02dT%02d:%02d:%02dZ', $time );
// if ( $month[0] === 'M' && $i === 3 ) { var_dump( $month, $regex, preg_match( $regex, $month ) ); die(); }
return new TimeValue( $timestamp, 0, 0, 0, $precision, TimeValue::CALENDAR_GREGORIAN );
}

throw new ParseException( "Failed to parse $value ("
. $this->parseFormattedNumber( $value ) . ')'.$pattern, $value );
}

/**
* @param string[] $matches
*
* @return int
*/
private function findMonthMatch( $matches ) {
for ( $i = 1; $i <= 12; $i++ ) {
if ( !empty( $matches['month' . $i] ) ) {
return $i;
}
}

return $this->normalizeMonth( $matches['month'] );
}

/**
* @param string $month
*
* @return int
*/
private function normalizeMonth( $month ) {
foreach ( $this->getMonthNames() as $i => $monthNames ) {
$pattern = '/^\p{Z}*('
. implode( '|', array_map(
function( $monthName ) {
return preg_quote( preg_replace( '/\p{P}+$/u', '', $monthName ), '/' );
}, $monthNames
) )
. ')[\p{P}\p{Z}]*$/iu';

if ( preg_match( $pattern, $month ) ) {
return $i;
}
}

return $this->normalizeNumber( $month );
}

/**
* @param string $number
*
* @return double
*/
private function normalizeNumber( $number ) {
$number = $this->parseFormattedNumber( $number );
return doubleval( preg_replace( '/\D+/s', '', $number ) );
}

/**
* @param string $number
*
* @return string
*/
private function parseFormattedNumber( $number ) {
$transformTable = $this->getDigitTransformTable();

if ( is_array( $transformTable ) ) {
// Eliminate empty array values (bug T66347).
$transformTable = array_filter( $transformTable );
$number = strtr( $number, array_flip( $transformTable ) );
}

return $number;
}

/**
* @return string
*/
private function getNumberCharacters() {
// TODO: Should there be a relaxed mode with \p{N} instead of \d?
$numberCharacters = '\d';

$transformTable = $this->getDigitTransformTable();
if ( is_array( $transformTable ) ) {
$numberCharacters .= preg_quote( implode( '', $transformTable ), '/' );
}

return $numberCharacters;
}

/**
* @return string
*/
private function getDateFormat() {
return $this->getOption( self::OPT_DATE_FORMAT );
}

/**
* @return string[]|null
*/
private function getDigitTransformTable() {
return $this->getOption( self::OPT_DIGIT_TRANSFORM_TABLE );
}

/**
* @return array[]
*/
private function getMonthNames() {
return $this->getOption( self::OPT_MONTH_NAMES ) ?: array();
}

}
111 changes: 111 additions & 0 deletions tests/ValueParsers/DateFormatParserTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
<?php

namespace ValueParsers\Test;

use DataValues\TimeValue;
use ValueParsers\DateFormatParser;
use ValueParsers\ParserOptions;

/**
* @covers ValueParsers\DateFormatParser
*
* @group ValueParsers
* @group WikibaseLib
* @group Wikibase
* @group TimeParsers
*
* @licence GNU GPL v2+
* @author Thiemo Mättig
*/
class DateFormatParserTest extends StringValueParserTest {

/**
* @deprecated since 0.3, just use getInstance.
*/
protected function getParserClass() {
throw new \LogicException( 'Should not be called, use getInstance' );
}

/**
* @see ValueParserTestBase::getInstance
*
* @return DateFormatParser
*/
protected function getInstance() {
return new DateFormatParser();
}

/**
* @see ValueParserTestBase::validInputProvider
*/
public function validInputProvider() {
$valid = array(
array(
'1 9 2014',
'd. M Y', null, null,
'+2014-09-01T00:00:00Z'
),
array(
'1 September 2014',
'd. M Y', null, array( 9 => array( 'September' ) ),
'+2014-09-01T00:00:00Z'
),
array(
'1. Sep. 2014',
'd. M Y', null, array( 9 => array( 'Sep' ) ),
'+2014-09-01T00:00:00Z'
),
array(
'1. September 2014',
'd. M Y', null, array( 9 => array( 'September' ) ),
'+2014-09-01T00:00:00Z'
),
array(
'1.September.2014',
'd. M Y', null, array( 9 => array( 'September' ) ),
'+2014-09-01T00:00:00Z'
),
);

$cases = array();

foreach ( $valid as $args ) {
$dateString = $args[0];
$dateFormat = $args[1];
$digitTransformTable = $args[2];
$monthNames = $args[3];
$timestamp = $args[4];
$precision = isset( $args[5] ) ? $args[5] : TimeValue::PRECISION_DAY;
$calendarModel = isset( $args[6] ) ? $args[6] : TimeValue::CALENDAR_GREGORIAN;

$cases[] = array(
$dateString,
new TimeValue( $timestamp, 0, 0, 0, $precision, $calendarModel ),
new DateFormatParser( new ParserOptions( array(
DateFormatParser::OPT_DATE_FORMAT => $dateFormat,
DateFormatParser::OPT_DIGIT_TRANSFORM_TABLE => $digitTransformTable,
DateFormatParser::OPT_MONTH_NAMES => $monthNames,
) ) )
);
}

return $cases;
}

/**
* @see StringValueParserTest::invalidInputProvider
*/
public function invalidInputProvider() {
$invalid = array(
);

$cases = parent::invalidInputProvider();

foreach ( $invalid as $value ) {
$cases[] = array( $value );
}

return $cases;
}

}

0 comments on commit c0d14e9

Please sign in to comment.