Permalink
Browse files

Rewrite checking routine to uri-unsafe chars

  • Loading branch information...
gfx committed Sep 6, 2010
1 parent 28b2513 commit 25c72e3dfc0770b9177630827c85ec9065ef9867
Showing with 106 additions and 296 deletions.
  1. +0 −34 author/char_trait.pl
  2. +30 −0 tool/uri_unsafe.PL
  3. +74 −0 uri_unsafe.h
  4. +2 −2 xs/Text-Xslate.xs
  5. +0 −260 xslate_char_trait.h
View
@@ -1,34 +0,0 @@
-#!perl -w
-use strict;
-
-my $uri_unsafe = qr/[^A-Za-z0-9\-\._~]/; # defined by RFC 3986
-my $html_meta = qr/[<>'"&]/; # '
-
-print "/* This file is automatically generated by $0 */\n";
-
-print <<'C';
-enum { TXct_NORMAL, TXct_HTML_META = 0x01, TXct_URI_UNSAFE = 0x02 };
-static const U8 char_trait[] = {
-C
-
-for(my $i = 0; $i < 256; $i++) {
- my $c = chr $i;
- my @s;
- if($c =~ $uri_unsafe) {
- push @s, 'TXct_URI_UNSAFE';
- }
- if($c =~ $html_meta) {
- push @s, 'TXct_HTML_META';
- }
- if(!@s) {
- push @s, 'TXct_NORMAL';
- }
- printf " %-30s /* \\x%02X%s */,\n",
- join('|', @s),
- $i, ($c =~ /[[:print:]]/ ? " $c" : '');
-}
-
-print <<'C';
-}; /* char_trait */
-C
-
View
@@ -0,0 +1,30 @@
+#!perl
+use strict;
+use warnings;
+
+my $uri_unsafe = qr/[^A-Za-z0-9\-\._~]/; # defined by RFC 3986
+
+print "/* Check whether a character is unsafe by the definition of RFC 3986 */\n";
+print "/* This file is automatically generated by $0 */\n";
+
+print <<'C';
+static int
+is_uri_unsafe(char const c) {
+ switch((unsigned char)c) {
+C
+
+for(my $i = 0; $i < 256; $i++) {
+ my $c = chr $i;
+
+ if($c !~ $uri_unsafe) {
+ printf " case 0x%02X: return 0;%s\n",
+ $i,
+ ($c =~ /[[:print:]]/ ? " /* $c */" : '');
+ }
+}
+
+print <<'C';
+ default: return 1;
+ }
+} /* is_uri_unsafe */
+C
View
@@ -0,0 +1,74 @@
+/* Check whether a character is unsafe by the definition of RFC 3986 */
+/* This file is automatically generated by tool/uri_unsafe.PL */
+static int
+is_uri_unsafe(char const c) {
+ switch((unsigned char)c) {
+ case 0x2D: return 0; /* - */
+ case 0x2E: return 0; /* . */
+ case 0x30: return 0; /* 0 */
+ case 0x31: return 0; /* 1 */
+ case 0x32: return 0; /* 2 */
+ case 0x33: return 0; /* 3 */
+ case 0x34: return 0; /* 4 */
+ case 0x35: return 0; /* 5 */
+ case 0x36: return 0; /* 6 */
+ case 0x37: return 0; /* 7 */
+ case 0x38: return 0; /* 8 */
+ case 0x39: return 0; /* 9 */
+ case 0x41: return 0; /* A */
+ case 0x42: return 0; /* B */
+ case 0x43: return 0; /* C */
+ case 0x44: return 0; /* D */
+ case 0x45: return 0; /* E */
+ case 0x46: return 0; /* F */
+ case 0x47: return 0; /* G */
+ case 0x48: return 0; /* H */
+ case 0x49: return 0; /* I */
+ case 0x4A: return 0; /* J */
+ case 0x4B: return 0; /* K */
+ case 0x4C: return 0; /* L */
+ case 0x4D: return 0; /* M */
+ case 0x4E: return 0; /* N */
+ case 0x4F: return 0; /* O */
+ case 0x50: return 0; /* P */
+ case 0x51: return 0; /* Q */
+ case 0x52: return 0; /* R */
+ case 0x53: return 0; /* S */
+ case 0x54: return 0; /* T */
+ case 0x55: return 0; /* U */
+ case 0x56: return 0; /* V */
+ case 0x57: return 0; /* W */
+ case 0x58: return 0; /* X */
+ case 0x59: return 0; /* Y */
+ case 0x5A: return 0; /* Z */
+ case 0x5F: return 0; /* _ */
+ case 0x61: return 0; /* a */
+ case 0x62: return 0; /* b */
+ case 0x63: return 0; /* c */
+ case 0x64: return 0; /* d */
+ case 0x65: return 0; /* e */
+ case 0x66: return 0; /* f */
+ case 0x67: return 0; /* g */
+ case 0x68: return 0; /* h */
+ case 0x69: return 0; /* i */
+ case 0x6A: return 0; /* j */
+ case 0x6B: return 0; /* k */
+ case 0x6C: return 0; /* l */
+ case 0x6D: return 0; /* m */
+ case 0x6E: return 0; /* n */
+ case 0x6F: return 0; /* o */
+ case 0x70: return 0; /* p */
+ case 0x71: return 0; /* q */
+ case 0x72: return 0; /* r */
+ case 0x73: return 0; /* s */
+ case 0x74: return 0; /* t */
+ case 0x75: return 0; /* u */
+ case 0x76: return 0; /* v */
+ case 0x77: return 0; /* w */
+ case 0x78: return 0; /* x */
+ case 0x79: return 0; /* y */
+ case 0x7A: return 0; /* z */
+ case 0x7E: return 0; /* ~ */
+ default: return 1;
+ }
+} /* is_uri_unsafe */
View
@@ -3,7 +3,7 @@
#define NEED_newSV_type
#include "xslate.h"
-#include "xslate_char_trait.h"
+#include "uri_unsafe.h"
/* aliases */
#define TXCODE_literal_i TXCODE_literal
@@ -481,7 +481,7 @@ tx_uri_escape(pTHX_ SV* const src) {
SvPOK_on(dest);
while(pv != end) {
- if(char_trait[(U8)*pv] & TXct_URI_UNSAFE) {
+ if(is_uri_unsafe(*pv)) {
/* identical to PL_hexdigit + 16 */
static const char hexdigit[] = "0123456789ABCDEF";
char p[3];
Oops, something went wrong.

0 comments on commit 25c72e3

Please sign in to comment.