Skip to content

Commit

Permalink
Add human duration parser
Browse files Browse the repository at this point in the history
  • Loading branch information
xemlock committed May 6, 2019
1 parent 5906a5a commit 7f87930
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 31 deletions.
2 changes: 0 additions & 2 deletions library/HTMLPurifier/AttrDef/HTML5/Datetime.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
* be extracted from the input, or the extracted data is insufficient for
* the desired format (with the exception of DatetimeGlobal format, which
* uses server timezone offset if none is detected).
*
* @since 0.1.11
*/
class HTMLPurifier_AttrDef_HTML5_Datetime extends HTMLPurifier_AttrDef
{
Expand Down
87 changes: 64 additions & 23 deletions library/HTMLPurifier/AttrDef/HTML5/Duration.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
/**
* Validates HTML5 duration string according to spec
* https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#durations
*
* @since 0.1.11
*/
class HTMLPurifier_AttrDef_HTML5_Duration extends HTMLPurifier_AttrDef
{
Expand All @@ -20,9 +18,7 @@ class HTMLPurifier_AttrDef_HTML5_Duration extends HTMLPurifier_AttrDef
)?
$/xi';

const REGEX_HUMAN = '/^
(\s*\d+\s*[WDHMS]|\s*\d+\.\d+S)+
$/xi';
const REGEX_HUMAN = '/(\d+(\s*[WDHMS]|\.\d+\s*S))/i';

/**
* @param string $string
Expand Down Expand Up @@ -67,15 +63,15 @@ protected function validateISODuration($string)
's' => 0,
);

foreach ($parts as $part => $_) {
if (!isset($match[$part])) {
foreach ($parts as $unit => $_) {
if (!isset($match[$unit])) {
continue;
}

$value = substr($match[$part], 0, -1);
$value = $part === 's' ? (float) $value : (int) $value;
$value = substr($match[$unit], 0, -1);
$value = $unit === 's' ? (float) $value : (int) $value;

$parts[$part] = $value;
$parts[$unit] = $value;
}

// The spec self-contradicts itself in disallowing weeks in ISO-8601
Expand All @@ -88,24 +84,15 @@ protected function validateISODuration($string)
}

$duration = 'P';

foreach ($parts as $part => $value) {
if ($part === 'h') {
foreach ($parts as $unit => $value) {
if ($unit === 'h') {
$duration .= 'T';
}

if ($value > 0) {
$duration .= (int) $value;
if ($part === 's') {
$msec = round(($value - (int) $value) * 1000);
if ($msec > 0) {
$duration .= rtrim(sprintf('.%03d', $msec), '0');
}
}
$duration .= strtoupper($part);
$duration .= ($unit === 's' ? $this->formatSeconds($value) : $value) . strtoupper($unit);
}
}

$duration = rtrim($duration, 'T');

// At least one element must be present, thus "P" is not a valid
Expand All @@ -122,11 +109,65 @@ protected function validateISODuration($string)

/**
* Validate human readable HTML5 duration string
*
* @param string $string
* @return boolean|string
*/
protected function validateHumanDuration($string)
{
return false;
if (!preg_match_all(self::REGEX_HUMAN, $string, $matches)) {
return false;
}

// One or more duration time components, each with a different duration
// time component scale, in any order.
$parts = array(
'w' => false,
'd' => false,
'h' => false,
'm' => false,
's' => false,
);

foreach ($matches[0] as $match) {
$unit = strtolower(substr($match, -1));

$value = rtrim(substr($match, 0, -1));
$value = $unit === 's' ? (float) $value : (int) $value;

if ($value > 0 && $parts[$unit] === false) {
$parts[$unit] = $value;
}
}

$duration = array();
foreach ($parts as $unit => $value) {
if ($value === false) {
continue;
}
$duration[] = ($unit === 's' ? $this->formatSeconds($value) : $value) . $unit;
}
$duration = implode(' ', $duration);

if ($duration === '') {
$duration = '0s';
}

return $duration;
}

/**
* Formats seconds without leading zero and at most 3 non-zero decimals
*
* @param float $sec
* @return string
*/
protected function formatSeconds($sec)
{
$msec = round(($sec - (int) $sec) * 1000);
if ($msec > 0) {
return rtrim(sprintf('%d.%03d', $sec, $msec), '0');
}
return sprintf('%d', $sec);
}
}
2 changes: 0 additions & 2 deletions library/HTMLPurifier/AttrDef/HTML5/Week.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
/**
* Validates HTML5 week string according to
* https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#weeks
*
* @since 0.1.11
*/
class HTMLPurifier_AttrDef_HTML5_Week extends HTMLPurifier_AttrDef
{
Expand Down
2 changes: 0 additions & 2 deletions library/HTMLPurifier/AttrDef/HTML5/YearlessDate.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
/**
* Validates HTML5 yearless date string according to the spec
* https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#yearless-dates
*
* @since 0.1.11
*/
class HTMLPurifier_AttrDef_HTML5_YearlessDate extends HTMLPurifier_AttrDef
{
Expand Down
1 change: 0 additions & 1 deletion library/HTMLPurifier/ChildDef/HTML5/Time.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
* is present if text content of the children is not a valid datetime string.
*
* @see https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-time-element
* @since 0.1.11
*/
class HTMLPurifier_ChildDef_HTML5_Time extends HTMLPurifier_ChildDef_HTML5
{
Expand Down
36 changes: 35 additions & 1 deletion tests/HTMLPurifier/AttrDef/HTML5/DurationTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ protected function setUp()
* @param string $input
* @param string $expected OPTIONAL
*/
public function testDuration($input, $expected = null)
public function testISODuration($input, $expected = null)
{
$this->assertValidate($input, $expected);
}
Expand All @@ -33,13 +33,47 @@ public function durationData()
array('PT4H18M3.010S', 'PT4H18M3.01S'),
array('PT4H18M3.100S', 'PT4H18M3.1S'),

array('1w 2d 3m 4s'),
array('1w 2d 3m 4.0s', '1w 2d 3m 4s'),
array('1w 2d 3m 4.010s', '1w 2d 3m 4.01s'),
array('1w 2d 3m 4.100s', '1w 2d 3m 4.1s'),

array('1w 4s'),
array('1w'),
array('2d'),
array('3m'),
array('4s'),

// empty duration
array('0d', '0s'),
array('0.000s', '0s'),

// any order
array('4s 3m 2d 1w', '1w 2d 3m 4s'),

// seconds
array('4.100s', '4.1s'),
array('4.010s', '4.01s'),
array('4.001s', '4.001s'),

// case sensitivity
array('1W 2D 3M 4S', '1w 2d 3m 4s'),
array('1W 2d 3M 4s', '1w 2d 3m 4s'),

// invalid but fixable
array('PT', 'PT0S'),
array('P1W', 'P7D'),
array('P1W1D', 'P8D'),
array('PT4H18M3.6666S', 'PT4H18M3.667S'),
array('pt4H18M3s', 'PT4H18M3S'),

// duplicated components, only first is relevant
array('1w 2w 3h 4h', '1w 3h'),

// round extra decimals
array('4.0001s', '4s'),
array('4.6666s', '4.667s'),

// invalid
array('', false),
array('2010-04-10', false),
Expand Down

0 comments on commit 7f87930

Please sign in to comment.