Skip to content

Commit

Permalink
- Fixed #ZETACOMP-15 : Template string functions are now multibyte safe
Browse files Browse the repository at this point in the history
# Thanks Andreas Schamberger
  • Loading branch information
Jerome Renard committed Aug 21, 2011
1 parent b474ce3 commit c61560e
Show file tree
Hide file tree
Showing 11 changed files with 3,142 additions and 174 deletions.
376 changes: 371 additions & 5 deletions src/functions/string_code.php

Large diffs are not rendered by default.

343 changes: 179 additions & 164 deletions src/functions/string_functions.php

Large diffs are not rendered by default.

1,082 changes: 1,082 additions & 0 deletions src/structs/lower_to_upper.php

Large diffs are not rendered by default.

1,073 changes: 1,073 additions & 0 deletions src/structs/upper_to_lower.php

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions src/template_autoload.php
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,10 @@
'ezcTemplateSourceToTstErrorMessages' => 'Template/error_messages.php',
'ezcTemplateString' => 'Template/functions/string_code.php',
'ezcTemplateStringFunctions' => 'Template/functions/string_functions.php',
'ezcTemplateStringLowerToUpperUnicodeMap' => 'Template/structs/lower_to_upper.php',
'ezcTemplateStringSourceToTstParser' => 'Template/parsers/source_to_tst/implementations/string.php',
'ezcTemplateStringTool' => 'Template/string_tool.php',
'ezcTemplateStringUpperToLowerUnicodeMap' => 'Template/structs/upper_to_lower.php',
'ezcTemplateSubtractionAssignmentOperatorAstNode' => 'Template/syntax_trees/ast/nodes/operators/subtraction_assignment_operator.php',
'ezcTemplateSubtractionOperatorAstNode' => 'Template/syntax_trees/ast/nodes/operators/subtraction_operator.php',
'ezcTemplateSwitchAstNode' => 'Template/syntax_trees/ast/nodes/control/switch.php',
Expand Down
152 changes: 152 additions & 0 deletions src/unicode/generate_unicode_tables.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
<?php
$licenseHeader = <<<LICENSE_HEADER
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
* THIS FILE IS MACHINE GENERATED. USE THE FOLLOWING SCRIPT TO REBUILD IT:
* - Template/src/unicode/generate_unicode_tables.php
*
LICENSE_HEADER;

$lowerToUpper = <<<END
<?php
/**
* File containing a mapping from unicode lowercase to uppercase letters.
*
END
.$licenseHeader;

$lowerToUpper .= <<<END
* @package Template
* @version //autogentag//
* @license http://www.apache.org/licenses/LICENSE-2.0 Apache License, Version 2.0
* @access private
*/
class ezcTemplateStringLowerToUpperUnicodeMap extends ezcBaseStruct
{
public \$unicodeTable = array(
END;

$upperToLower = <<<END
<?php
/**
* File containing a mapping from unicode uppercase to lowercase letters.
END
.$licenseHeader;

$upperToLower .= <<<END
* @package Template
* @version //autogentag//
* @license http://www.apache.org/licenses/LICENSE-2.0 Apache License, Version 2.0
* @access private
*/
class ezcTemplateStringUpperToLowerUnicodeMap extends ezcBaseStruct
{
public \$unicodeTable = array(
END;


$fp = fopen( 'http://www.unicode.org/Public/UNIDATA/UnicodeData.txt', 'r' );

if ( $fp !== false )
{
while ( ( $line = fgets( $fp ) ) !== false )
{
$columns = explode( ';', $line );
$source = getHexStringFromCodepoint( $columns[0] );
if ( !empty( $columns[12] ) )
{
$lowerToUpper .= ' "' . $source . '" => "' . getHexStringFromCodepoint( $columns[12] ) . '", // ' . $columns[1] . PHP_EOL;
}
if ( !empty( $columns[13] ) )
{
$upperToLower .= ' "' . $source . '" => "' . getHexStringFromCodepoint( $columns[13] ) . '", // ' . $columns[1] . PHP_EOL;
}
}
fclose( $fp );

$lowerToUpper .= ');' . PHP_EOL . '}';
$upperToLower .= ');' . PHP_EOL . '}';

file_put_contents(
'Template/src/structs/lower_to_upper.php',
$lowerToUpper
);
file_put_contents(
'Template/src/structs/upper_to_lower.php',
$upperToLower
);
}

/**
* Get the hex representation of a unicode codepoint.
*
* What is going on:
* http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&item_id=IWS-AppendixA
* http://developers.sun.com/dev/gadc/technicalpublications/articles/utf8.html
*
* @param int $codepoint
* @return string
*/
function getHexStringFromCodepoint( $codepoint )
{
// the comments below explain whats done with the bitwise calculations
$codepoint = hexdec( $codepoint );
$result = '';
if ( $codepoint < 0x80 )
{
// C1 = U
$result = "\\x" . dechex( $codepoint );
}
elseif ( $codepoint < 0x800 )
{
// C1 = U \ 64 + 192
// C2 = U mod 64 + 128
$result = "\\x" . dechex( $codepoint >> 6 | 0xc0 ) .
"\\x" . dechex( $codepoint & 0x3f | 0x80 );
}
elseif ( $codepoint < 0x10000 )
{
// C1 = U \ 4096 + 224
// C2 = (U mod 4096) \ 64 + 128
// C3 = U mod 64 + 128
$result = "\\x" . dechex( $codepoint >> 12 | 0xe0 ) .
"\\x" . dechex( $codepoint >> 6 & 0x3f | 0x80 ) .
"\\x" . dechex( $codepoint & 0x3f | 0x80 );
}
elseif ( $codepoint < 0x110000 )
{
// C1 = U \ 262144 + 240
// C2 = (U mod 262144) \ 4096 + 128
// C3 = (U mod 4096) \ 64 + 128
// C4 = U mod 64 + 128
$result = "\\x" . dechex( $codepoint >> 18 | 0xf0 ) .
"\\x" . dechex( $codepoint >> 12 & 0x3f | 0x80 ) .
"\\x" . dechex( $codepoint >> 6 & 0x3f | 0x80 ) .
"\\x" . dechex( $codepoint & 0x3f | 0x80 );
}
return $result;
}
2 changes: 1 addition & 1 deletion tests/regression_test.php
Original file line number Diff line number Diff line change
Expand Up @@ -606,4 +606,4 @@ public function testRunRegression( $directory )



?>
?>
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,4 @@ two paragraphs")}
39. {str_ord( "A" )}
40. {str_char_count( "hello" )}
41. {str_index_of( "Hello", "l")}
42. {str_chr( 65 )}
42. {str_chr( 65 )}
4 changes: 2 additions & 2 deletions tests/regression_tests/functions/correct/string_functions.out
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
32. Hello world!
33. dlrow olleH
34. 2
35. 2
35. 3
36. abcd
efgh
ijkl
Expand All @@ -48,4 +48,4 @@ asdf
39. 65
40. 5
41. 2
42. A
42. A
Loading

0 comments on commit c61560e

Please sign in to comment.