Skip to content

Commit

Permalink
[+]: "ASCII::normalize_whitespace()" -> support for "VERTICAL TAB" v2.1
Browse files Browse the repository at this point in the history
  • Loading branch information
voku committed Nov 12, 2020
1 parent 56318c1 commit 8095367
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 5 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

### 1.5.6 (2020-11-12)

- "ASCII::normalize_whitespace()" -> can now also remove "control characters" if needed (+ VERTICAL TAB)
- "ASCII::normalize_whitespace()" -> can now also remove "control characters" if needed v2

### 1.5.5 (2020-11-12)

Expand Down
26 changes: 22 additions & 4 deletions src/voku/helper/ASCII.php
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,25 @@ public static function normalize_whitespace(
$cacheKey = (int) $keepNonBreakingSpace;

if ($normalize_control_characters) {
$str = \str_replace(["\xe2\x80\xa8", "\xe2\x80\xa9", "\x0B"], ["\n", "\n", "\t"], $str);
$str = \str_replace(
[
"\x0d\x0c", // 'END OF LINE'
"\xe2\x80\xa8", // 'LINE SEPARATOR'
"\xe2\x80\xa9", // 'PARAGRAPH SEPARATOR'
"\x0c", // 'FORM FEED'
"\x0d", // 'CARRIAGE RETURN'
"\x0b", // 'VERTICAL TAB'
],
[
"\n",
"\n",
"\n",
"\n",
"\n",
"\t",
],
$str
);
}

if (!isset($WHITESPACE_CACHE[$cacheKey])) {
Expand Down Expand Up @@ -681,7 +699,7 @@ public static function normalize_whitespace(
* @param string $str
* @param bool $url_encoded
* @param string $replacement
* @param bool $keep_control_characters
* @param bool $keep_basic_control_characters
*
* @psalm-pure
*
Expand All @@ -691,7 +709,7 @@ public static function remove_invisible_characters(
string $str,
bool $url_encoded = false,
string $replacement = '',
bool $keep_control_characters = true
bool $keep_basic_control_characters = true
): string {
// init
$non_displayables = [];
Expand All @@ -705,7 +723,7 @@ public static function remove_invisible_characters(
$non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
}

if ($keep_control_characters) {
if ($keep_basic_control_characters) {
$non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
} else {
$str = self::normalize_whitespace($str, false, false, true);
Expand Down

0 comments on commit 8095367

Please sign in to comment.