Skip to content

Commit

Permalink
used parser to determine the parameter names
Browse files Browse the repository at this point in the history
  • Loading branch information
Matthias Molitor committed Aug 8, 2016
1 parent fb0a4fb commit c945d77
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 109 deletions.
50 changes: 5 additions & 45 deletions Translator/Formatting/Analysis/MessageAnalyzer.php
Original file line number Diff line number Diff line change
Expand Up @@ -40,53 +40,13 @@ public function __construct($message)
*/
public function getParameters()
{
$bracePattern = '/(?P<braces>\{|\})[^\\\']/u';
$nonEscapedBraces = array();
preg_match_all($bracePattern, $this->message, $nonEscapedBraces, PREG_PATTERN_ORDER|PREG_OFFSET_CAPTURE);
$openBracesBefore = function ($offset) use ($nonEscapedBraces) {
$openBraces = 0;
// Braces must be sorted by offset!
foreach ($nonEscapedBraces['braces'] as $match) {
list($brace, $braceOffset) = $match;
if ($braceOffset >= $offset) {
break;
}
if ($brace === '{') {
$openBraces++;
} else {
$openBraces--;
}
}
return $openBraces;
};

$choicesPattern = '/\{([a-zA-Z0-9_]+), (plural|select|choice),/u';
$choices = array();
preg_match_all($choicesPattern, $this->message, $choices, PREG_PATTERN_ORDER|PREG_OFFSET_CAPTURE);
$openChoicesBefore = function ($offset) use($choices) {
$numberOfChoices = 0;
foreach ($choices[0] as $match) {
$matchOffset = $match[1];
if ($matchOffset < $offset) {
$numberOfChoices++;
}
}
return $numberOfChoices;
};

$parameterPattern = '/\{(?P<parameters>[a-zA-Z0-9_]+)(,|\})/u';
$possibleParameters = array();
preg_match_all($parameterPattern, $this->message, $possibleParameters, PREG_PATTERN_ORDER|PREG_OFFSET_CAPTURE);
$parameters = array();
foreach ($possibleParameters['parameters'] as $matchIndex => $match) {
list($name, $offset) = $match;
// Start offset counting at the brace.
$offset--;
if (($openBracesBefore($offset) - ($openChoicesBefore($offset) * 2)) < 0) {
// Probably not a parameter, but part of a choice branch.
continue;
$tokens = (new MessageParser(new MessageLexer()))->parse($this->message);
foreach ($tokens as $token) {
/* @var $token array */
if ($token[0] === MessageParser::TOKEN_PARAMETER_NAME) {
$parameters[] = $token[1];
}
$parameters[] = $name;
}
return array_values(array_unique($parameters));
}
Expand Down
2 changes: 2 additions & 0 deletions Translator/Formatting/Analysis/MessageLexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

/**
* Splits a translation message into tokens that can be analyzed by a parser.
*
* @internal
*/
class MessageLexer extends AbstractLexer
{
Expand Down
103 changes: 39 additions & 64 deletions Translator/Formatting/Analysis/MessageParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
use JMS\Parser\AbstractParser;

/**
* Parse a translation message and replaces named variables with indexes,
* which are the only supported variable types in PHP versions prior 5.5.
* Parses a translation message and returns the tokens of that message.
*
* @internal
*/
class MessageParser extends AbstractParser
{
Expand Down Expand Up @@ -47,56 +48,59 @@ class MessageParser extends AbstractParser
const STATE_QUOTED_TEXT = 'quoted_text';

/**
* Stack whose top element holds the current parsing state.
*
* @var \SplStack|null
* Token for parameter names, for example the "name" in {name}.
*/
protected $state = null;
const TOKEN_PARAMETER_NAME = 'parameter_name';

/**
* Maps parameter names to indexes.
*
* @var array(string=>integer)
* Token for choice types, for example the "select" in {name, select, [...]}
*/
protected $parameters = null;
const TOKEN_CHOICE_TYPE = 'choice_type';

/**
* Parses the message and replaces parameter names by numerical indexes.
* Stack whose top element holds the current parsing state.
*
* Returns an object that contains the modified message as well as the
* parameter mapping.
* @var \SplStack|null
*/
protected $state = null;

/**
* Parses the message and returns the tokens.
*
* The result contains the following attributes:
* # message - The modified message.
* # mapping - Array that maps parameter names to indices.
* The result is an array of tokens.
* Each token is an array that consists of the token type as
* first value and the message part as second value.
* The token type is always one of the MessageLexer::TOKEN_* or
* MessageParser::TOKEN_* constants.
*
* @param string $message
* @param string|null $context
* @return \stdClass
* @return array<array<string>> The message tokens.
*/
public function parse($message, $context = null)
{
if (strpos($message, '{') === false) {
// Message does not contain any declarations, therefore, we can avoid
// the parsing process.
return $this->createResult($message, array());
return array(array(MessageLexer::TOKEN_TEXT, $message));
}
return parent::parse($message, $context);
}

/**
* Performs the parsing and creates the result object that is returned by parse().
* Performs the parsing and creates the result that is returned by parse().
*
* @return \stdClass
* @return array<array<string>> The message tokens.
*/
protected function parseInternal()
{
$this->state = new \SplStack();
$this->enterState(self::STATE_TEXT);
$this->parameters = array();
$message = '';
$tokens = array();
$this->lexer->moveNext();
while ($this->lexer->token !== null) {
$tokenType = $this->getTokenType();

if ($this->isState(self::STATE_TEXT)) {
if ($this->isToken(MessageLexer::TOKEN_SINGLE_QUOTE)) {
$this->enterState(self::STATE_QUOTED_TEXT);
Expand All @@ -110,8 +114,7 @@ protected function parseInternal()
} elseif ($this->isState(self::STATE_DECLARATION_START)) {
if ($this->isToken(MessageLexer::TOKEN_TEXT)) {
$this->swapState(self::STATE_DECLARATION_VARIABLE);
$name = $this->getTokenValue();
$this->setTokenValue($this->getParameterIndex($name));
$tokenType = self::TOKEN_PARAMETER_NAME;
}

} elseif ($this->isState(self::STATE_DECLARATION_VARIABLE)) {
Expand All @@ -125,6 +128,7 @@ protected function parseInternal()
if ($this->isToken(MessageLexer::TOKEN_TEXT)) {
if (in_array($this->getTokenValue(), array('select', 'choice', 'plural'))) {
$this->swapState(self::STATE_DECLARATION_EXPRESSION);
$tokenType = self::TOKEN_CHOICE_TYPE;
}
} elseif ($this->isToken(MessageLexer::TOKEN_COMMA)) {
$this->swapState(self::STATE_DECLARATION_ARGUMENT);
Expand All @@ -147,41 +151,12 @@ protected function parseInternal()
}
}

$message .= $this->getTokenValue();
$tokens[] = array($tokenType, $this->getTokenValue());

$this->lexer->moveNext();
}

return $this->createResult($message, $this->parameters);
}

/**
* Creates a result object that contains the provided message and parameter mapping.
*
* @param string $message
* @param array(string=>integer) $parameterMapping
* @return \stdClass
*/
protected function createResult($message, $parameterMapping)
{
$result = new \stdClass();
$result->message = $message;
$result->mapping = $parameterMapping;
return $result;
}

/**
* Returns the index of the provided parameter.
*
* @param string $name
* @return integer
*/
protected function getParameterIndex($name)
{
if (!isset($this->parameters[$name])) {
$this->parameters[$name] = count($this->parameters);
}
return $this->parameters[$name];
return $tokens;
}

/**
Expand All @@ -192,27 +167,27 @@ protected function getParameterIndex($name)
*/
protected function isToken($type)
{
return $this->lexer->token[2] === $type;
return $this->getTokenType() === $type;
}

/**
* Returns the value of the current token.
* Returns the type of the current token.
*
* @return string
* @return integer One of the MessageLexer::TOKEN_* constants.
*/
protected function getTokenValue()
protected function getTokenType()
{
return $this->lexer->token[0];
return $this->lexer->token[2];
}

/**
* Sets the value ofg the current token.
* Returns the value of the current token.
*
* @param string $newValue
* @return string
*/
protected function setTokenValue($newValue)
protected function getTokenValue()
{
$this->lexer->token[0] = $newValue;
return $this->lexer->token[0];
}

/**
Expand Down

0 comments on commit c945d77

Please sign in to comment.