[+]: added tests & fixed "UTF8::substr_compare()"

voku · Feb 22, 2015 · 0701da7 · 0701da7
1 parent 39425f0
commit 0701da7
Show file tree

Hide file tree

Showing 2 changed files with 119 additions and 95 deletions.
diff --git a/src/voku/helper/UTF8.php b/src/voku/helper/UTF8.php
@@ -1792,27 +1792,6 @@ public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null
     return self::filter($var);
   }
 
-  /**
-   * filter input
-   *
-   * @param      $type
-   * @param      $var
-   * @param int  $filter
-   * @param null $option
-   *
-   * @return mixed|string
-   */
-  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
-  {
-    if (4 > func_num_args()) {
-      $var = filter_input($type, $var, $filter);
-    } else {
-      $var = filter_input($type, $var, $filter, $option);
-    }
-
-    return self::filter($var);
-  }
-
   /**
    * normalizes to UTF-8 NFC, converting from CP-1252 when needed
    *
@@ -1977,6 +1956,27 @@ public static function utf8_decode($text)
     return $o;
   }
 
+  /**
+   * filter input
+   *
+   * @param      $type
+   * @param      $var
+   * @param int  $filter
+   * @param null $option
+   *
+   * @return mixed|string
+   */
+  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
+  {
+    if (4 > func_num_args()) {
+      $var = filter_input($type, $var, $filter);
+    } else {
+      $var = filter_input($type, $var, $filter, $option);
+    }
+
+    return self::filter($var);
+  }
+
   /**
    * utf8_encode
    *
@@ -4216,18 +4216,22 @@ public static function strtr($s, $from, $to = INF)
   /**
    * Binary safe comparison of two strings from an offset, up to length characters
    *
-   * @param     $a
-   * @param     $b
-   * @param     $offset
-   * @param int $len
-   * @param int $i
+   * @param string $main_str            The main string being compared.
+   * @param string $str                 The secondary string being compared.
+   * @param int    $offset              The start position for the comparison. If negative, it starts counting from the
+   *                                    end of the string.
+   * @param int    $length              The length of the comparison. The default value is the largest of the length of
+   *                                    the str compared to the length of main_str less the offset.
+   * @param int    $case_insensitivity  If case_insensitivity is TRUE, comparison is case insensitive.
    *
    * @return int
    */
-  public static function substr_compare($a, $b, $offset, $len = 2147483647, $i = 0)
+  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = 0)
   {
-    $a = self::substr($a, $offset, $len);
-    return $i ? self::strcasecmp($a, $b) : self::strcmp($a, $b);
+    $main_str = self::substr($main_str, $offset, $length);
+    $str = self::substr($str, 0, self::strlen($main_str));
+
+    return $case_insensitivity ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
   }
 
   /**
@@ -4473,6 +4477,72 @@ public static function utf8_fix_win1252_chars($text)
     return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text);
   }
 
+  /**
+   * returns an array of Unicode White Space characters
+   *
+   * @return   array An array with numeric code point as key and White Space Character as value
+   */
+  public static function ws()
+  {
+    static $white = array(
+
+      //    Numeric Code Point    => UTF-8 Character
+
+      0     => "\x0",
+      //NUL Byte
+      9     => "\x9",
+      //Tab
+      10    => "\xa",
+      //New Line
+      11    => "\xb",
+      //Vertical Tab
+      13    => "\xd",
+      //Carriage Return
+      32    => "\x20",
+      //Ordinary Space
+      160   => "\xc2\xa0",
+      //NO-BREAK SPACE
+      5760  => "\xe1\x9a\x80",
+      //OGHAM SPACE MARK
+      6158  => "\xe1\xa0\x8e",
+      //MONGOLIAN VOWEL SEPARATOR
+      8192  => "\xe2\x80\x80",
+      //EN QUAD
+      8193  => "\xe2\x80\x81",
+      //EM QUAD
+      8194  => "\xe2\x80\x82",
+      //EN SPACE
+      8195  => "\xe2\x80\x83",
+      //EM SPACE
+      8196  => "\xe2\x80\x84",
+      //THREE-PER-EM SPACE
+      8197  => "\xe2\x80\x85",
+      //FOUR-PER-EM SPACE
+      8198  => "\xe2\x80\x86",
+      //SIX-PER-EM SPACE
+      8199  => "\xe2\x80\x87",
+      //FIGURE SPACE
+      8200  => "\xe2\x80\x88",
+      //PUNCTUATION SPACE
+      8201  => "\xe2\x80\x89",
+      //THIN SPACE
+      8202  => "\xe2\x80\x8a",
+      //HAIR SPACE
+      8232  => "\xe2\x80\xa8",
+      //LINE SEPARATOR
+      8233  => "\xe2\x80\xa9",
+      //PARAGRAPH SEPARATOR
+      8239  => "\xe2\x80\xaf",
+      //NARROW NO-BREAK SPACE
+      8287  => "\xe2\x81\x9f",
+      //MEDIUM MATHEMATICAL SPACE
+      12288 => "\xe3\x80\x80"
+      //IDEOGRAPHIC SPACE
+    );
+
+    return $white;
+  }
+
   /**
    * return a array with "urlencoded"-win1252 -> UTF-8
    *
@@ -4710,70 +4780,4 @@ protected static function urldecode_fix_win1252_chars()
     return $array;
   }
 
-  /**
-   * returns an array of Unicode White Space characters
-   *
-   * @return   array An array with numeric code point as key and White Space Character as value
-   */
-  public static function ws()
-  {
-    static $white = array(
-
-      //    Numeric Code Point    => UTF-8 Character
-
-      0     => "\x0",
-      //NUL Byte
-      9     => "\x9",
-      //Tab
-      10    => "\xa",
-      //New Line
-      11    => "\xb",
-      //Vertical Tab
-      13    => "\xd",
-      //Carriage Return
-      32    => "\x20",
-      //Ordinary Space
-      160   => "\xc2\xa0",
-      //NO-BREAK SPACE
-      5760  => "\xe1\x9a\x80",
-      //OGHAM SPACE MARK
-      6158  => "\xe1\xa0\x8e",
-      //MONGOLIAN VOWEL SEPARATOR
-      8192  => "\xe2\x80\x80",
-      //EN QUAD
-      8193  => "\xe2\x80\x81",
-      //EM QUAD
-      8194  => "\xe2\x80\x82",
-      //EN SPACE
-      8195  => "\xe2\x80\x83",
-      //EM SPACE
-      8196  => "\xe2\x80\x84",
-      //THREE-PER-EM SPACE
-      8197  => "\xe2\x80\x85",
-      //FOUR-PER-EM SPACE
-      8198  => "\xe2\x80\x86",
-      //SIX-PER-EM SPACE
-      8199  => "\xe2\x80\x87",
-      //FIGURE SPACE
-      8200  => "\xe2\x80\x88",
-      //PUNCTUATION SPACE
-      8201  => "\xe2\x80\x89",
-      //THIN SPACE
-      8202  => "\xe2\x80\x8a",
-      //HAIR SPACE
-      8232  => "\xe2\x80\xa8",
-      //LINE SEPARATOR
-      8233  => "\xe2\x80\xa9",
-      //PARAGRAPH SEPARATOR
-      8239  => "\xe2\x80\xaf",
-      //NARROW NO-BREAK SPACE
-      8287  => "\xe2\x81\x9f",
-      //MEDIUM MATHEMATICAL SPACE
-      12288 => "\xe3\x80\x80"
-      //IDEOGRAPHIC SPACE
-    );
-
-    return $white;
-  }
-
 }
diff --git a/tests/UTF8Test.php b/tests/UTF8Test.php
@@ -570,6 +570,26 @@ public function testNumberFormat()
     $this->assertEquals('1,3', UTF8::number_format('1.298765', 1, ',', ''));
   }
 
+  public function testSubstrCompare()
+  {
+    $this->assertEquals(0, substr_compare("abcde", "bc", 1, 2));
+    $this->assertEquals(0, substr_compare("abcde", "de", -2, 2));
+    $this->assertEquals(0, substr_compare("abcde", "bcg", 1, 2));
+    $this->assertEquals(0, substr_compare("abcde", "BC", 1, 2, true));
+    $this->assertEquals(1, substr_compare("abcde", "bc", 1, 3));
+    $this->assertEquals(-1, substr_compare("abcde", "cd", 1, 2));
+
+    $this->assertEquals(0, UTF8::substr_compare("abcde", "bc", 1, 2));
+    $this->assertEquals(0, UTF8::substr_compare("abcde", "de", -2, 2));
+    $this->assertEquals(0, UTF8::substr_compare("abcde", "bcg", 1, 2));
+    $this->assertEquals(0, UTF8::substr_compare("abcde", "BC", 1, 2, true));
+    $this->assertEquals(1, UTF8::substr_compare("abcde", "bc", 1, 3));
+    $this->assertEquals(-1, UTF8::substr_compare("abcde", "cd", 1, 2));
+
+    // UTF-8
+    $this->assertEquals(0, UTF8::substr_compare("○●◎\r", "●◎", 1, 2));
+  }
+
   public function testFilterInput()
   {
     $options = array(