Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 686 lines (656 sloc) 46.58 kb
46cb2c3 Added HTML filtering
drlippman authored
1 <?php
2
3 /*
4 htmLawed 1.0.8, 27 May 2008
5 Copyright Santosh Patnaik
6 GPL v3 license
7 A PHP Labware internal utility - http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed
8
9 See htmLawed_README.txt/.htm
10
11 Stripslashes() GET/POST if magic_quotes is on
12 */
13
14 function htmLawed($in, $cf = 1, $spec = array()){
15 $cf = is_array($cf) ? $cf : array();
16 // config: valid_xhtml
17 if(!empty($cf['valid_xhtml'])){
18 $cf['elements'] = !empty($cf['elements']) ? $cf['elements'] : '*-center-dir-font-isindex-menu-s-strike-u';
19 $cf['make_tag_strict'] = isset($cf['make_tag_strict']) ? $cf['make_tag_strict'] : 2;
20 $cf['xml:lang'] = isset($cf['xml:lang']) ? $cf['xml:lang'] : 2;
21 }
22 // config: elements
23 $ec = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'applet'=>1, 'area'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'blockquote'=>1, 'br'=>1, 'button'=>1, 'caption'=>1, 'center'=>1, 'cite'=>1, 'code'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'del'=>1, 'dfn'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'dt'=>1, 'em'=>1, 'embed'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'isindex'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'object'=>1, 'ol'=>1, 'optgroup'=>1, 'option'=>1, 'p'=>1, 'param'=>1, 'pre'=>1, 'q'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'table'=>1, 'tbody'=>1, 'td'=>1, 'textarea'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1, 'tt'=>1, 'u'=>1, 'ul'=>1, 'var'=>1); // 86 incl. deprecated, embed, ruby set
24 if(!empty($cf['safe'])){
25 unset($ec['applet'], $ec['embed'], $ec['iframe'], $ec['object'], $ec['script']);
26 }
27 $tmp = !empty($cf['elements']) ? str_replace(array("\n", "\r", "\t", ' '), '', $cf['elements']) : '*';
28 if($tmp == '-*'){$ec = array();}
29 elseif(strpos($tmp, '*') === false){$ec = array_flip(explode(',', $tmp));}
30 else{
31 if(isset($tmp[1])){
32 preg_match_all('`(?:^|-|\+)[^\-+]+?(?=-|\+|$)`', $tmp, $m, PREG_SET_ORDER);
33 for($i=count($m); --$i>=0;){$m[$i] = $m[$i][0];}
34 foreach($m as $v){
35 if($v[0] == '+'){$ec[substr($v, 1)] = 1;}
36 if($v[0] == '-' && isset($ec[($v = substr($v, 1))]) && !in_array('+'. $v, $m)){unset($ec[$v]);}
37 }
38 }
39 }
40 $cf['elements'] =& $ec;
41 // config: denied attributes
42 $cf['deny_attribute'] = !empty($cf['deny_attribute']) ? array_flip(explode(',', str_replace(array("\n", "\r", "\t", ' '), '', $cf['deny_attribute']. (!empty($cf['safe']) ? ',on*' : '')))) : (!empty($cf['safe']) ? array('on*'=>1) : array());
43 if(isset($cf['deny_attribute']['on*'])){
44 unset($cf['deny_attribute']['on*']);
45 $cf['deny_attribute'] += array('onblur'=>1, 'onchange'=>1, 'onclick'=>1, 'ondblclick'=>1, 'onfocus'=>1, 'onkeydown'=>1, 'onkeypress'=>1, 'onkeyup'=>1, 'onmousedown'=>1, 'onmousemove'=>1, 'onmouseout'=>1, 'onmouseover'=>1, 'onmouseup'=>1, 'onreset'=>1, 'onselect'=>1, 'onsubmit'=>1);
46 }
47 // config: URL schemes
48 $tmp = (isset($cf['schemes'][2]) && strpos($cf['schemes'], ':')) ? strtolower($cf['schemes']) : 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https';
49 $cf['schemes'] = array();
50 foreach(explode(';', str_replace(array(' ', "\t", "\r", "\n"), '', $tmp)) as $v){
51 $tmp = $tmp2 = null; list($tmp, $tmp2) = explode(':', $v, 2);
52 if($tmp2){$cf['schemes'][$tmp] = array_flip(explode(',', $tmp2));}
53 }
54 if(!isset($cf['schemes']['*'])){$cf['schemes']['*'] = array('file'=>1, 'http'=>1, 'https'=>1,);}
55 if(!empty($cf['safe']) && empty($cf['schemes']['style'])){$cf['schemes']['style'] = array('nil'=>1);}
56 // config: abs/rel URL
57 $cf['abs_url'] = isset($cf['abs_url']) ? $cf['abs_url'] : 0;
58 if(!isset($cf['base_url']) or !preg_match('`^[a-zA-Z\d.+\-]+://[^/]+/(.+?/)?$`', $cf['base_url'])){
59 $cf['base_url'] = $cf['abs_url'] = 0;
60 }
61 // config: rest
62 $cf['and_mark'] = !empty($cf['and_mark']) ? 1 : 0;
63 $cf['anti_link_spam'] = (isset($cf['anti_link_spam']) && is_array($cf['anti_link_spam']) && count($cf['anti_link_spam']) == 2 && (empty($cf['anti_link_spam'][0]) or hl_regex($cf['anti_link_spam'][0])) && (empty($cf['anti_link_spam'][1]) or hl_regex($cf['anti_link_spam'][1]))) ? $cf['anti_link_spam'] : 0;
64 $cf['anti_mail_spam'] = isset($cf['anti_mail_spam']) ? $cf['anti_mail_spam'] : 0;
65 $cf['balance'] = isset($cf['balance']) ? (bool)$cf['balance'] : 1;
66 $cf['cdata'] = isset($cf['cdata']) ? $cf['cdata'] : (empty($cf['safe']) ? 3 : 0);
67 $cf['clean_ms_char'] = isset($cf['clean_ms_char']) ? $cf['clean_ms_char'] : 0;
68 $cf['comment'] = isset($cf['comment']) ? $cf['comment'] : (empty($cf['safe']) ? 3 : 0);
69 $cf['css_expression'] = isset($cf['css_expression']) ? (bool)$cf['css_expression'] : 0;
70 $cf['hexdec_entity'] = isset($cf['hexdec_entity']) ? $cf['hexdec_entity'] : 1;
71 $cf['hook'] = (!empty($cf['hook']) && function_exists($cf['hook'])) ? $cf['hook'] : 0;
72 $cf['keep_bad'] = isset($cf['keep_bad']) ? $cf['keep_bad'] : 6;
73 $cf['lc_std_val'] = isset($cf['lc_std_val']) ? (bool)$cf['lc_std_val'] : 1;
74 $cf['make_tag_strict'] = isset($cf['make_tag_strict']) ? $cf['make_tag_strict'] : 1;
75 $cf['named_entity'] = isset($cf['named_entity']) ? (bool)$cf['named_entity'] : 1;
76 $cf['no_deprecated_attr'] = isset($cf['no_deprecated_attr']) ? $cf['no_deprecated_attr'] : 1;
77 $cf['parent'] = isset($cf['parent'][0]) ? strtolower($cf['parent']) : 'body';
78 $cf['show_setting'] = isset($cf['show_setting'][0]) ? $cf['show_setting'] : 0;
79 $cf['unique_ids'] = isset($cf['unique_ids']) ? $cf['unique_ids'] : 1;
80 $cf['xml:lang'] = isset($cf['xml:lang']) ? $cf['xml:lang'] : 0;
81 if(isset($GLOBALS['cf'])){$resetCf = $GLOBALS['cf'];}
82 $GLOBALS['cf'] = $cf;
83 // $spec
84 $spec = is_array($spec) ? $spec : hl_spec($spec);
85 if(isset($GLOBALS['spec'])){$resetSpec = $GLOBALS['spec'];}
86 $GLOBALS['spec'] = $spec;
87 // chars
88 $in = preg_replace('`[\x00-\x08\x0b-\x0c\x0e-\x1f]`', '', $in);
89 if($cf['clean_ms_char']){
90 $el = array("\x7f"=>'', "\x80"=>'&#8364;', "\x81"=>'', "\x83"=>'&#402;', "\x85"=>'&#8230;', "\x86"=>'&#8224;', "\x87"=>'&#8225;', "\x88"=>'&#710;', "\x89"=>'&#8240;', "\x8a"=>'&#352;', "\x8b"=>'&#8249;', "\x8c"=>'&#338;', "\x8d"=>'', "\x8e"=>'&#381;', "\x8f"=>'', "\x90"=>'', "\x95"=>'&#8226;', "\x96"=>'&#8211;', "\x97"=>'&#8212;', "\x98"=>'&#732;', "\x99"=>'&#8482;', "\x9a"=>'&#353;', "\x9b"=>'&#8250;', "\x9c"=>'&#339;', "\x9d"=>'', "\x9e"=>'&#382;', "\x9f"=>'&#376;');
91 $el = $el + ($cf['clean_ms_char'] == 1 ? array("\x82"=>'&#8218;', "\x84"=>'&#8222;', "\x91"=>'&#8216;', "\x92"=>'&#8217;', "\x93"=>'&#8220;', "\x94"=>'&#8221;') : array("\x82"=>'\'', "\x84"=>'"', "\x91"=>'\'', "\x92"=>'\'', "\x93"=>'"', "\x94"=>'"'));
92 $in = strtr($in, $el);
93 }
94 // comments/CDATA secs
95 if($cf['cdata'] or $cf['comment']){$in = preg_replace_callback('`<!(?:(?:--.*?--)|(?:\[CDATA\[.*?\]\]))>`sm', 'hl_cmtcd', $in);}
96 // entities
97 $in = preg_replace_callback('`&amp;([A-Za-z][A-Za-z0-9]{1,30}|#(?:[0-9]{1,8}|[Xx][0-9A-Fa-f]{1,7}));`', 'hl_ent', str_replace('&', '&amp;', $in));
98 // for unique-ID check; global for multiple calls
99 if($cf['unique_ids'] && !isset($GLOBALS['hl_Ids'])){$GLOBALS['hl_Ids'] = array();}
100 // custom hook
101 if($cf['hook']){$in = $cf['hook']($in, $cf, $spec);}
102 // show finalized cf & spec
103 if($cf['show_setting']){
104 $GLOBALS[$cf['show_setting']] = array('config'=>$cf, 'spec'=>$spec, 'time'=>microtime());
105 }
106 // main work
107 $in = preg_replace_callback('`<(?:(?:\s|$)|(?:[^>]*(?:>|$)))|>`m', 'hl_tag', $in);
108 $in = ($cf['balance'] ? hl_bal($in, $cf['keep_bad'], $cf['parent']) : $in);
109 $in = (($cf['cdata'] or $cf['comment']) && strpos($in, "\x01") !== false) ? str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05"), array('', '', '&', '<', '>'), $in) : $in;
110 // end
111 unset($cf, $ec, $ac);
112 if(isset($resetCf)){$GLOBALS['cf'] = $resetCf;}
113 if(isset($resetSpec)){$GLOBALS['spec'] = $resetSpec;}
fbc3b5b enable basiclti consumer
drlippman authored
114 //add custom removal of instructor-specific stuff
115 if (!isset($GLOBALS['teacherid'])) {
116 $in = remove_instr_only($in);
117 }
46cb2c3 Added HTML filtering
drlippman authored
118 return $in;
119 // eof
120 }
121
122 function hl_attrval($v, $p){
123 // check attr val against user spec
124 $o = 1; $l = strlen($v);
125 foreach($p as $pn=>$pv){
126 switch($pn){
127 case 'maxlen':if($l > $pv){$o = 0;}
128 break; case 'minlen': if($l < $pv){$o = 0;}
129 break; case 'maxval': if((float)($v) > $pv){$o = 0;}
130 break; case 'minval': if((float)($v) < $pv){$o = 0;}
131 break; case 'match': if(!preg_match($pv, $v)){$o = 0;}
132 break; case 'nomatch': if(preg_match($pv, $v)){$o = 0;}
133 break; case 'oneof':
134 $o2 = 0;
135 foreach(explode('|', $pv) as $k=>$v2){if($v == $v2){$o2 = 1; break;}}
136 $o = $o2;
137 break; case 'noneof':
138 $o2 = 1;
139 foreach(explode('|', $pv) as $k=>$v2){if($v == $v2){$o2 = 0; break;}}
140 $o = $o2;
141 break; default:
142 break;
143 }
144 if(!$o){break;}
145 }
146 return ($o ? $v : (isset($p['default']) ? $p['default'] : 0));
147 // eof
148 }
149
150 function hl_bal($tx, $do = 1, $in = 'div'){
151 // balance non-frameset, body HTML tags - 86 elements: HTML 4/XHTML 1/deprecated/ruby/embed in $tx. Entitify non-tag < & >. Proper empty ele closure. Allow comments/CDATA secs. Fix invalid nesting, remove/neutralize bad as per $do: 0, remove; 1, neutralize tags and content; 2, remove tags but neutralize content; 3 & 4, like 1 & 2 but remove if text is invalid in parent ele; 5 & 6, like 3 & 4 but linebreaks, tabs & spaces are left. $in, specifying ele holding $tx, affects ele permitted in $tx. Attempts to add div inside $cB eles if rqd. No correction optimization. Thus, the typo in '<tabl>...</table>' renders the whole block invalid.
152
153 // eles by Content
154 $cB = array('blockquote'=>1, 'form'=>1, 'map'=>1, 'noscript'=>1); // Block
155 $cE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty
156 $cF = array('button'=>1, 'del'=>1, 'div'=>1, 'dd'=>1, 'fieldset'=>1, 'iframe'=>1, 'ins'=>1, 'li'=>1, 'noscript'=>1, 'object'=>1, 'td'=>1, 'th'=>1); // Flow; later context-wise dynamic move of ins & del to $cI
157 $cI = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'caption'=>1, 'cite'=>1, 'code'=>1, 'dfn'=>1, 'dt'=>1, 'em'=>1, 'font'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'i'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'p'=>1, 'pre'=>1, 'q'=>1, 'rb'=>1, 'rt'=>1, 's'=>1, 'samp'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'tt'=>1, 'u'=>1, 'var'=>1); // Inline
158 $cN = array('a'=>array('a'=>1), 'button'=>array('a'=>1, 'button'=>1, 'fieldset'=>1, 'form'=>1, 'iframe'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'fieldset'=>array('fieldset'=>1), 'form'=>array('form'=>1), 'label'=>array('label'=>1), 'noscript'=>array('script'=>1), 'pre'=>array('big'=>1, 'font'=>1, 'img'=>1, 'object'=>1, 'script'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1), 'rb'=>array('ruby'=>1), 'rt'=>array('ruby'=>1)); // Illegal
159 $cN2 = array_keys($cN);
160 $cR = array('blockquote'=>1, 'dir'=>1, 'dl'=>1, 'form'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1);
161 $cS = array('colgroup'=>array('col'=>1), 'dir'=>array('li'), 'dl'=>array('dd'=>1, 'dt'=>1), 'menu'=>array('li'=>1), 'ol'=>array('li'=>1), 'optgroup'=>array('option'=>1), 'option'=>array('#pcdata'=>1), 'rbc'=>array('rb'=>1), 'rp'=>array('#pcdata'=>1), 'rtc'=>array('rt'=>1), 'ruby'=>array('rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1), 'select'=>array('optgroup'=>1, 'option'=>1), 'script'=>array('#pcdata'=>1), 'table'=>array('caption'=>1, 'col'=>1, 'colgroup'=>1, 'tfoot'=>1, 'tbody'=>1, 'tr'=>1, 'thead'=>1), 'tbody'=>array('tr'=>1), 'tfoot'=>array('tr'=>1), 'textarea'=>array('#pcdata'=>1), 'thead'=>array('tr'=>1), 'tr'=>array('td'=>1, 'th'=>1), 'ul'=>array('li'=>1)); // Specific (immediate parent-child)
162 $cO = array('address'=>array('p'=>1), 'applet'=>array('param'=>1), 'blockquote'=>array('script'=>1), 'fieldset'=>array('legend'=>1, '#pcdata'=>1), 'form'=>array('script'=>1), 'map'=>array('area'=>1), 'object'=>array('param'=>1, 'embed'=>1)); // Other
163
164 // eles by block/inline type; ins & del both type; #pcdata: plain text
165 $eB = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'del'=>1, 'dir'=>1, 'dl'=>1, 'div'=>1, 'fieldset'=>1, 'form'=>1, 'ins'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'isindex'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'table'=>1, 'ul'=>1);
166 $eI = array('#pcdata'=>1, 'a'=>1, 'abbr'=>1, 'acronym'=>1, 'applet'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'br'=>1, 'button'=>1, 'cite'=>1, 'code'=>1, 'del'=>1, 'dfn'=>1, 'em'=>1, 'embed'=>1, 'font'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'kbd'=>1, 'label'=>1, 'map'=>1, 'object'=>1, 'param'=>1, 'q'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'select'=>1, 'script'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1, 'tt'=>1, 'u'=>1, 'var'=>1);
167 $eN = array('a'=>1, 'big'=>1, 'button'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'label'=>1, 'object'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1); // Exclude from specific ele; $cN values
168 $eO = array('area'=>1, 'caption'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'dt'=>1, 'legend'=>1, 'li'=>1, 'optgroup'=>1, 'option'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'script'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'thead'=>1, 'th'=>1, 'tr'=>1); // Missing in $eB & $eI
169 $eF = $eB + $eI;
170
171 // $in sets allowed children
172 $in = ((isset($eF[$in]) && $in != '#pcdata') or isset($eO[$in])) ? $in : 'div';
173 if(isset($cE[$in])){
174 return (!$do ? '' : str_replace(array('<', '>'), array('&lt;', '&gt;'), $tx));
175 }
176 if(isset($cS[$in])){$inOk = $cS[$in];}
177 elseif(isset($cI[$in])){$inOk = $eI; $cI['del'] = 1; $cI['ins'] = 1;}
178 elseif(isset($cF[$in])){$inOk = $eF; unset($cI['del'], $cI['ins']);}
179 elseif(isset($cB[$in])){$inOk = $eB; unset($cI['del'], $cI['ins']);}
180 if(isset($cO[$in])){$inOk = $inOk + $cO[$in];}
181 if(isset($cN[$in])){$inOk = array_diff_assoc($inOk, $cN[$in]);}
182
183 $tx = explode('<', $tx);
184 $ok = $q = array(); // $q seq list of open non-empty ele
185 ob_start();
186
187 for($i=-1, $ci=count($tx); ++$i<$ci;){
188 // parent ele $p & allowed child $ok
189 if($ql = count($q)){
190 $p = array_pop($q);
191 $q[] = $p;
192 if(isset($cS[$p])){$ok = $cS[$p];}
193 elseif(isset($cI[$p])){$ok = $eI; $cI['del'] = 1; $cI['ins'] = 1;}
194 elseif(isset($cF[$p])){$ok = $eF; unset($cI['del'], $cI['ins']);}
195 elseif(isset($cB[$p])){$ok = $eB; unset($cI['del'], $cI['ins']);}
196 if(isset($cO[$p])){$ok = $ok + $cO[$p];}
197 if(isset($cN[$p])){$ok = array_diff_assoc($ok, $cN[$p]);}
198 }else{$ok = $inOk; unset($cI['del'], $cI['ins']);}
199 // bad tags, & ele content
200 if(isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))){
201 echo '&lt;', $s, $e, $a, '&gt;';
202 }
203 if(isset($x[0])){
204 if($do < 3 or isset($ok['#pcdata'])){echo $x;}
205 elseif(strpos($x, "\x02\x04")){
206 foreach(preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v){
207 echo (substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : ''));
208 }
209 }elseif($do > 4){echo preg_replace('`\S`', '', $x);}
210 }
211 // get markup
212 if(!preg_match('`^(/?)([a-zA-Z1-6]+)([^>]*)>(.*)`sm', $tx[$i], $r)){$x = $tx[$i]; continue;}
213 $s = null; $e = null; $a = null; $x = null; list($all, $s, $e, $a, $x) = $r;
214 // close tag
215 if($s){
216 if(isset($cE[$e]) or !in_array($e, $q)){continue;} // Empty/unopen
217 if($p == $e){array_pop($q); echo '</', $e, '>'; unset($e); continue;} // Last open
218 $add = ''; // Nesting - close open tags that need to be
219 for($j=-1, $cj=count($q); ++$j<$cj;){
220 if(($d = array_pop($q)) == $e){break;}
221 else{$add .= "</{$d}>";}
222 }
223 echo $add, '</', $e, '>'; unset($e); continue;
224 }
225 // open tag
226 // $cB ele needs $eB ele as child - <form><INPUT>
227 if(isset($cB[$e]) && strlen(trim($x))){
228 $tx[$i] = "{$e}{$a}>";
229 array_splice($tx, $i+1, 0, 'div>'. $x); unset($e, $x); ++$ci; --$i; continue;
230 }
231 if((($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql)) && !isset($eB[$e]) && !isset($ok[$e])){
232 array_splice($tx, $i, 0, 'div>'); unset($e, $x); ++$ci; --$i; continue;
233 }
234 // if no open ele, $in is parent; except for certain cases, immediate parent-child relation should hold
235 if(!$ql or (!isset($eN[$e]) or !array_intersect($q, $cN2))){
78fe13d bug fix forum page numbers
drlippman authored
236 //if(!isset($ok[$e])){continue;} //DLMOD: Want to preserve improperly nested like <p><table> even though bad HTML, so don't remove bad children
46cb2c3 Added HTML filtering
drlippman authored
237 if(!isset($cE[$e])){$q[] = $e;}
238 echo '<', $e, $a, '>'; unset($e); continue;
239 }
240 // specific parent-child
241 if(isset($cS[$p][$e])){
242 if(!isset($cE[$e])){$q[] = $e;}
243 echo '<', $e, $a, '>'; unset($e); continue;
244 }
245 // nesting
246 $add = '';
247 $q2 = array();
248 for($k=-1, $kc=count($q); ++$k<$kc;){
249 $d = $q[$k];
250 $ok2 = array();
251 if(isset($cS[$d])){$q2[] = $d; continue;}
252 $ok2 = isset($cI[$d]) ? $eI : $eF;
253 if(isset($cO[$d])){$ok2 = $ok2 + $cO[$d];}
254 if(isset($cN[$d])){$ok2 = array_diff_assoc($ok2, $cN[$d]);}
255 if(!isset($ok2[$e])){
256 if(!$k && !isset($inOk[$e])){continue 2;}
257 $add = "</{$d}>";
258 for(;++$k<$kc;){$add = "</{$q[$k]}>{$add}";}
259 break;
260 }
261 else{$q2[] = $d;}
262 }
263 $q = $q2;
264 if(!isset($cE[$e])){$q[] = $e;}
265 echo $add, '<', $e, $a, '>'; unset($e); continue;
266 }
267
268 // end
269 if($ql = count($q)){
270 $p = array_pop($q);
271 $q[] = $p;
272 if(isset($cS[$p])){$ok = $cS[$p];}
273 elseif(isset($cI[$p])){$ok = $eI; $cI['del'] = 1; $cI['ins'] = 1;}
274 elseif(isset($cF[$p])){$ok = $eF; unset($cI['del'], $cI['ins']);}
275 elseif(isset($cB[$p])){$ok = $eB; unset($cI['del'], $cI['ins']);}
276 if(isset($cO[$p])){$ok = $ok + $cO[$p];}
277 if(isset($cN[$p])){$ok = array_diff_assoc($ok, $cN[$p]);}
278 }else{$ok = $inOk; unset($cI['del'], $cI['ins']);}
279 if(isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))){
280 echo '&lt;', $s, $e, $a, '&gt;';
281 }
282 if(isset($x[0])){
283 if(strlen(trim($x)) && (($ql && isset($cB[$p])) or (isset($cB[$in]) && !$ql))){
284 echo '<div>', $x, '</div>';
285 }
286 elseif($do < 3 or isset($ok['#pcdata'])){echo $x;}
287 elseif(strpos($x, "\x02\x04")){
288 foreach(preg_split('`(\x01\x02[^\x01\x02]+\x02\x01)`', $x, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $v){
289 echo (substr($v, 0, 2) == "\x01\x02" ? $v : ($do > 4 ? preg_replace('`\S`', '', $v) : ''));
290 }
291 }elseif($do > 4){echo preg_replace('`\S`', '', $x);}
292 }
293 while(!empty($q) && ($e = array_pop($q))){echo '</', $e, '>';}
294 $o = ob_get_contents();
295 ob_end_clean();
296 return $o;
297 // eof
298 }
299
300 function hl_cmtcd($in){
301 // comment/CDATA sec handler
302 $in = $in[0];
303 global $cf;
304 if($in[3] == '-'){ // Comment
305 if(!$cf['comment']){return $in;}
306 if($cf['comment'] == 1){return '';}
307 if(substr(($in = substr($in, 4, -3)), -1) != ' '){$in .= ' ';}
308 while(strpos($in, '--') !== false){$in = str_replace('--', '-', $in);} // No --
309 $in = $cf['comment'] == 2 ? str_replace(array('&', '<', '>'), array('&amp;', '&lt;', '&gt;'), $in) : $in;
310 $in = "\x01\x02\x04!--$in--\x05\x02\x01";
311 }else{
312 if(!$cf['cdata']){return $in;}
313 if($cf['cdata'] == 1){return '';}
314 $in = substr($in, 1, -1);
315 $in = $cf['cdata'] == 2 ? str_replace(array('&', '<', '>'), array('&amp;', '&lt;', '&gt;'), $in) : $in;
316 $in = "\x01\x01\x04$in\x05\x01\x01";
317 }
318 return str_replace(array('&', '<', '>'), array("\x03", "\x04", "\x05"), $in);
319 // eof
320 }
321
322 function hl_ent($in){
323 // entitify invalids; optionally named to numeric, and hexdec. to dec., or opp.
324 global $cf;
325 $in = $in[1];
326 // univ named ents
327 static $U = array('quot'=>1,'amp'=>1,'lt'=>1,'gt'=>1);
328 // HTML named ents
329 static $N = array('fnof'=>'402', 'Alpha'=>'913', 'Beta'=>'914', 'Gamma'=>'915', 'Delta'=>'916', 'Epsilon'=>'917', 'Zeta'=>'918', 'Eta'=>'919', 'Theta'=>'920', 'Iota'=>'921', 'Kappa'=>'922', 'Lambda'=>'923', 'Mu'=>'924', 'Nu'=>'925', 'Xi'=>'926', 'Omicron'=>'927', 'Pi'=>'928', 'Rho'=>'929', 'Sigma'=>'931', 'Tau'=>'932', 'Upsilon'=>'933', 'Phi'=>'934', 'Chi'=>'935', 'Psi'=>'936', 'Omega'=>'937', 'alpha'=>'945', 'beta'=>'946', 'gamma'=>'947', 'delta'=>'948', 'epsilon'=>'949', 'zeta'=>'950', 'eta'=>'951', 'theta'=>'952', 'iota'=>'953', 'kappa'=>'954', 'lambda'=>'955', 'mu'=>'956', 'nu'=>'957', 'xi'=>'958', 'omicron'=>'959', 'pi'=>'960', 'rho'=>'961', 'sigmaf'=>'962', 'sigma'=>'963', 'tau'=>'964', 'upsilon'=>'965', 'phi'=>'966', 'chi'=>'967', 'psi'=>'968', 'omega'=>'969', 'thetasym'=>'977', 'upsih'=>'978', 'piv'=>'982', 'bull'=>'8226', 'hellip'=>'8230', 'prime'=>'8242', 'Prime'=>'8243', 'oline'=>'8254', 'frasl'=>'8260', 'weierp'=>'8472', 'image'=>'8465', 'real'=>'8476', 'trade'=>'8482', 'alefsym'=>'8501', 'larr'=>'8592', 'uarr'=>'8593', 'rarr'=>'8594', 'darr'=>'8595', 'harr'=>'8596', 'crarr'=>'8629', 'lArr'=>'8656', 'uArr'=>'8657', 'rArr'=>'8658', 'dArr'=>'8659', 'hArr'=>'8660', 'forall'=>'8704', 'part'=>'8706', 'exist'=>'8707', 'empty'=>'8709', 'nabla'=>'8711', 'isin'=>'8712', 'notin'=>'8713', 'ni'=>'8715', 'prod'=>'8719', 'sum'=>'8721', 'minus'=>'8722', 'lowast'=>'8727', 'radic'=>'8730', 'prop'=>'8733', 'infin'=>'8734', 'ang'=>'8736', 'and'=>'8743', 'or'=>'8744', 'cap'=>'8745', 'cup'=>'8746', 'int'=>'8747', 'there4'=>'8756', 'sim'=>'8764', 'cong'=>'8773', 'asymp'=>'8776', 'ne'=>'8800', 'equiv'=>'8801', 'le'=>'8804', 'ge'=>'8805', 'sub'=>'8834', 'sup'=>'8835', 'nsub'=>'8836', 'sube'=>'8838', 'supe'=>'8839', 'oplus'=>'8853', 'otimes'=>'8855', 'perp'=>'8869', 'sdot'=>'8901', 'lceil'=>'8968', 'rceil'=>'8969', 'lfloor'=>'8970', 'rfloor'=>'8971', 'lang'=>'9001', 'rang'=>'9002', 'loz'=>'9674', 'spades'=>'9824', 'clubs'=>'9827', 'hearts'=>'9829', 'diams'=>'9830', 'apos'=>'39', 'OElig'=>'338', 'oelig'=>'339', 'Scaron'=>'352', 'scaron'=>'353', 'Yuml'=>'376', 'circ'=>'710', 'tilde'=>'732', 'ensp'=>'8194', 'emsp'=>'8195', 'thinsp'=>'8201', 'zwnj'=>'8204', 'zwj'=>'8205', 'lrm'=>'8206', 'rlm'=>'8207', 'ndash'=>'8211', 'mdash'=>'8212', 'lsquo'=>'8216', 'rsquo'=>'8217', 'sbquo'=>'8218', 'ldquo'=>'8220', 'rdquo'=>'8221', 'bdquo'=>'8222', 'dagger'=>'8224', 'Dagger'=>'8225', 'permil'=>'8240', 'lsaquo'=>'8249', 'rsaquo'=>'8250', 'euro'=>'8364', 'nbsp'=>'160', 'iexcl'=>'161', 'cent'=>'162', 'pound'=>'163', 'curren'=>'164', 'yen'=>'165', 'brvbar'=>'166', 'sect'=>'167', 'uml'=>'168', 'copy'=>'169', 'ordf'=>'170', 'laquo'=>'171', 'not'=>'172', 'shy'=>'173', 'reg'=>'174', 'macr'=>'175', 'deg'=>'176', 'plusmn'=>'177', 'sup2'=>'178', 'sup3'=>'179', 'acute'=>'180', 'micro'=>'181', 'para'=>'182', 'middot'=>'183', 'cedil'=>'184', 'sup1'=>'185', 'ordm'=>'186', 'raquo'=>'187', 'frac14'=>'188', 'frac12'=>'189', 'frac34'=>'190', 'iquest'=>'191', 'Agrave'=>'192', 'Aacute'=>'193', 'Acirc'=>'194', 'Atilde'=>'195', 'Auml'=>'196', 'Aring'=>'197', 'AElig'=>'198', 'Ccedil'=>'199', 'Egrave'=>'200', 'Eacute'=>'201', 'Ecirc'=>'202', 'Euml'=>'203', 'Igrave'=>'204', 'Iacute'=>'205', 'Icirc'=>'206', 'Iuml'=>'207', 'ETH'=>'208', 'Ntilde'=>'209', 'Ograve'=>'210', 'Oacute'=>'211', 'Ocirc'=>'212', 'Otilde'=>'213', 'Ouml'=>'214', 'times'=>'215', 'Oslash'=>'216', 'Ugrave'=>'217', 'Uacute'=>'218', 'Ucirc'=>'219', 'Uuml'=>'220', 'Yacute'=>'221', 'THORN'=>'222', 'szlig'=>'223', 'agrave'=>'224', 'aacute'=>'225', 'acirc'=>'226', 'atilde'=>'227', 'auml'=>'228', 'aring'=>'229', 'aelig'=>'230', 'ccedil'=>'231', 'egrave'=>'232', 'eacute'=>'233', 'ecirc'=>'234', 'euml'=>'235', 'igrave'=>'236', 'iacute'=>'237', 'icirc'=>'238', 'iuml'=>'239', 'eth'=>'240', 'ntilde'=>'241', 'ograve'=>'242', 'oacute'=>'243', 'ocirc'=>'244', 'otilde'=>'245', 'ouml'=>'246', 'divide'=>'247', 'oslash'=>'248', 'ugrave'=>'249', 'uacute'=>'250', 'ucirc'=>'251', 'uuml'=>'252', 'yacute'=>'253', 'thorn'=>'254', 'yuml'=>'255');
330 if($in[0] != '#'){
331 return ($cf['and_mark'] ? "\x06" : '&'). (isset($U[$in]) ? $in : (isset($N[$in]) ? (!$cf['named_entity'] ? '#'. ($cf['hexdec_entity'] > 1 ? 'x'. dechex($N[$in]) : $N[$in]) : $in) : 'amp;'. $in)). ';';
332 }
333 if(($n = ctype_digit($in = substr($in, 1)) ? intval($in) : hexdec(substr($in, 1))) < 9 or ($n > 13 && $n < 32) or $n == 11 or $n == 12 or ($n > 126 && $n < 160 && $n != 133) or ($n > 64975 && $n < 64992) or $n > 65535){
334 return ($cf['and_mark'] ? "\x06" : '&'). "amp;#{$in};";
335 }
336 return ($cf['and_mark'] ? "\x06" : '&'). '#'. (((ctype_digit($in) && $cf['hexdec_entity'] < 2) or !$cf['hexdec_entity']) ? $n : 'x'. dechex($n)). ';';
337 // eof
338 }
339
340 function hl_prot($p, $c=null){
341 // check URL scheme
342 global $cf;
343 $b = $a = '';
344 if($c == null){$c = 'style'; $b = $p[1]; $a = $p[3]; $p = trim($p[2]);}
345 $c = isset($cf['schemes'][$c]) ? $cf['schemes'][$c] : $cf['schemes']['*'];
346 if(isset($c['*']) or !strcspn($p, '#?;')){return "{$b}{$p}{$a}";} // All ok, frag, query, param
347 if(preg_match('`^([a-z\d\-+.&#; ]+?)(:|&#(58|x3a);|%3a|\\\\0{0,4}3a).`i', $p, $m) && !isset($c[strtolower($m[1])])){ // Denied prot
348 return "{$b}denied:{$p}{$a}";
349 }
350 if($cf['abs_url']){
351 if($cf['abs_url'] == -1 && strpos($p, $cf['base_url']) === 0){ // Make url rel
352 $p = substr($p, strlen($cf['base_url']));
353 }elseif(empty($m[1])){ // Make rel URL abs; re rfc 1808; not inherit param/query/frag
354 if(substr($p, 0, 2) == '//'){$p = substr($cf['base_url'], 0, strpos($cf['base_url'], ':')+1). $p;}
355 elseif($p[0] == '/'){$p = preg_replace('`(^.+?://[^/]+)(.*)`', '$1', $cf['base_url']). $p;}
356 elseif(strcspn($p, './')){$p = $cf['base_url']. $p;}
357 else{
358 preg_match('`^([a-zA-Z\d\-+.]+://[^/]+)(.*)`', $cf['base_url'], $m);
359 $p = preg_replace('`(?<=/)\./`', '', $m[2]. $p);
360 while(preg_match('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', $p)){
361 $p = preg_replace('`(?<=/)([^/]{3,}|[^/.]+?|\.[^/.]|[^/.]\.)/\.\./`', '', $p);
362 }
363 $p = $m[1]. $p;
364 }
365 }
366 }
367 return "{$b}{$p}{$a}";
368 // eof
369 }
370
371 function hl_regex($p){
372 // ?ok regex
373 if(empty($p)){return 0;}
374 if($t = ini_get('track_errors')){$o = isset($php_errormsg) ? $php_errormsg : null;}
375 else{ini_set('track_errors', 1);}
376 unset($php_errormsg);
377 if(($d = ini_get('display_errors'))){ini_set('display_errors', 0);}
378 preg_match($p, '');
379 if($d){ini_set('display_errors', 1);}
380 $r = isset($php_errormsg) ? 0 : 1;
381 if($t){$php_errormsg = isset($o) ? $o : null;}
382 else{ini_set('track_errors', 0);}
383 return $r;
384 // eof
385 }
386
387 function hl_spec($t){
388 // finalize $spec
389 $s = array();
390 $t = str_replace(array("\t", "\r", "\n", ' '), '', preg_replace('/"(?>(`.|[^"])*)"/sme', 'substr(str_replace(array(";", "|", "~", " ", ",", "/", "(", ")", \'`"\'), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\""), \'\\0\'), 1, -1)', trim($t)));
391 for($i = count(($t = explode(';', $t))); --$i>=0;){
392 $w = $t[$i];
393 if(empty($w) or ($e = strpos($w, '=')) === false or !strlen(($a = substr($w, $e+1)))){continue;}
394 $y = $n = array();
395 foreach(explode(',', $a) as $v){
396 if(!preg_match('`^([a-z:\-\*]+)(?:\((.*?)\))?`i', $v, $m)){continue;}
397 if(($an = strtolower($m[1])) == '-*'){$n['*'] = 1; continue;}
398 if($an[0] == '-'){$n[substr($an, 1)] = 1; continue;}
399 if(!isset($m[2])){$y[$an] = 1; continue;}
400 foreach(explode('/', $m[2]) as $m){
401 if(empty($m) or ($pm = strpos($m, '=')) == 0 or $pm < 5){$y[$an] = 1; continue;}
402 $y[$an][strtolower(substr($m, 0, $pm))] = str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08"), array(";", "|", "~", " ", ",", "/", "(", ")"), substr($m, $pm+1));
403 }
404 if(isset($y[$an]['match']) && !hl_regex($y[$an]['match'])){unset($y[$an]['match']);}
405 if(isset($y[$an]['nomatch']) && !hl_regex($y[$an]['nomatch'])){unset($y[$an]['nomatch']);}
406 }
407 if(!count($y) && !count($n)){continue;}
408 if(!isset($n['*'])){
409 foreach($y as $k=>$v){
410 if(!is_array($v) or !count($v)){unset($y[$k]);}
411 }
412 }
413 foreach(explode(',', substr($w, 0, $e)) as $v){
414 if(!strlen(($v = strtolower($v)))){continue;}
415 if(count($y)){$s[$v] = $y;}
416 if(count($n)){$s[$v]['n'] = $n;}
417 }
418 }
419 return $s;
420 // eof
421 }
422
423 function hl_tag($in){
424 // tag/attribute handler
425 global $cf;
426 $in = $in[0];
427 // entitify invalid < >
428 if($in == '< '){return '&lt; ';}
429 if($in == '>'){return '&gt;';}
430 if(!preg_match('`^<(/?)([a-zA-Z][a-zA-Z1-6]*)([^>]*?)\s?>$`m', $in, $m)){
431 return str_replace(array('<', '>'), array('&lt;', '&gt;'), $in);
432 }elseif(!isset($cf['elements'][($e = strtolower($m[2]))])){
433 return (($cf['keep_bad']%2) ? str_replace(array('<', '>'), array('&lt;', '&gt;'), $in) : '');
434 }
435 // attr string
436 $a = str_replace(array("\xad", "\n", "\r", "\t"), ' ', trim($m[3]));
437 if(strpos($a, '&') !== false){
438 str_replace(array('&#xad;', '&#173;', '&shy;'), ' ', $a);
439 }
440 // tag transform
441 static $eD = array('applet'=>1, 'center'=>1, 'dir'=>1, 'embed'=>1, 'font'=>1, 'isindex'=>1, 'menu'=>1, 's'=>1, 'strike'=>1, 'u'=>1); // Deprecated ele
442 if($cf['make_tag_strict'] && isset($eD[$e])){
443 $trt = hl_tag2($e, $a, $cf['make_tag_strict']);
444 if(!$e){return (($cf['keep_bad']%2) ? str_replace(array('<', '>'), array('&lt;', '&gt;'), $in) : '');}
445 }
446 // close tag
447 static $eE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty ele
448 if(!empty($m[1])){
449 return (!isset($eE[$e]) ? "</$e>" : (($cf['keep_bad'])%2 ? str_replace(array('<', '>'), array('&lt;', '&gt;'), $in) : ''));
450 }
451
452 // open tag & attr
453 static $aN = array('script'=>array('embed'=>1, 'img'=>1), 'sscr'=>array('embed'=>1, 'img'=>1), 'abbr'=>array('td'=>1, 'th'=>1), 'accept-charset'=>array('form'=>1), 'accept'=>array('form'=>1, 'input'=>1), 'accesskey'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'legend'=>1, 'textarea'=>1), 'action'=>array('form'=>1), 'align'=>array('caption'=>1, 'embed'=>1, 'applet'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'legend'=>1, 'table'=>1, 'hr'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'p'=>1, 'col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'alt'=>array('applet'=>1, 'area'=>1, 'img'=>1, 'input'=>1), 'archive'=>array('applet'=>1, 'object'=>1), 'axis'=>array('td'=>1, 'th'=>1), 'bgcolor'=>array('embed'=>1, 'table'=>1, 'tr'=>1, 'td'=>1, 'th'=>1), 'border'=>array('table'=>1, 'img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'cellpadding'=>array('table'=>1), 'cellspacing'=>array('table'=>1), 'char'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charoff'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'charset'=>array('a'=>1, 'script'=>1), 'checked'=>array('input'=>1), 'cite'=>array('blockquote'=>1, 'q'=>1, 'del'=>1, 'ins'=>1), 'classid'=>array('object'=>1), 'clear'=>array('br'=>1), 'code'=>array('applet'=>1), 'codebase'=>array('object'=>1, 'applet'=>1), 'codetype'=>array('object'=>1), 'color'=>array('font'=>1), 'cols'=>array('textarea'=>1), 'colspan'=>array('td'=>1, 'th'=>1), 'compact'=>array('dir'=>1, 'dl'=>1, 'menu'=>1, 'ol'=>1, 'ul'=>1), 'coords'=>array('area'=>1, 'a'=>1), 'data'=>array('object'=>1), 'datetime'=>array('del'=>1, 'ins'=>1), 'declare'=>array('object'=>1), 'defer'=>array('script'=>1), 'dir'=>array('bdo'=>1), 'disabled'=>array('button'=>1, 'input'=>1, 'optgroup'=>1, 'option'=>1, 'select'=>1, 'textarea'=>1), 'enctype'=>array('form'=>1), 'face'=>array('font'=>1), 'for'=>array('label'=>1), 'frame'=>array('table'=>1), 'frameborder'=>array('iframe'=>1), 'headers'=>array('td'=>1, 'th'=>1), 'height'=>array('embed'=>1, 'iframe'=>1, 'td'=>1, 'th'=>1, 'img'=>1, 'object'=>1, 'applet'=>1), 'href'=>array('a'=>1, 'area'=>1), 'hreflang'=>array('a'=>1), 'hspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'ismap'=>array('img'=>1, 'input'=>1), 'label'=>array('option'=>1, 'optgroup'=>1), 'language'=>array('script'=>1), 'longdesc'=>array('img'=>1, 'iframe'=>1), 'marginheight'=>array('iframe'=>1), 'marginwidth'=>array('iframe'=>1), 'maxlength'=>array('input'=>1), 'method'=>array('form'=>1), 'model'=>array('embed'=>1), 'multiple'=>array('select'=>1), 'name'=>array('button'=>1, 'embed'=>1, 'textarea'=>1, 'applet'=>1, 'select'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'a'=>1, 'input'=>1, 'object'=>1, 'map'=>1, 'param'=>1), 'nohref'=>array('area'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'object'=>array('applet'=>1), 'onblur'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onchange'=>array('input'=>1, 'select'=>1, 'textarea'=>1), 'onfocus'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'onreset'=>array('form'=>1), 'onselect'=>array('input'=>1, 'textarea'=>1), 'onsubmit'=>array('form'=>1), 'pluginspage'=>array('embed'=>1), 'pluginurl'=>array('embed'=>1), 'prompt'=>array('isindex'=>1), 'readonly'=>array('textarea'=>1, 'input'=>1), 'rel'=>array('a'=>1), 'rev'=>array('a'=>1), 'rows'=>array('textarea'=>1), 'rowspan'=>array('td'=>1, 'th'=>1), 'rules'=>array('table'=>1), 'scope'=>array('td'=>1, 'th'=>1), 'scrolling'=>array('iframe'=>1), 'selected'=>array('option'=>1), 'shape'=>array('area'=>1, 'a'=>1), 'size'=>array('hr'=>1, 'font'=>1, 'input'=>1, 'select'=>1), 'span'=>array('col'=>1, 'colgroup'=>1), 'src'=>array('embed'=>1, 'script'=>1, 'input'=>1, 'iframe'=>1, 'img'=>1), 'standby'=>array('object'=>1), 'start'=>array('ol'=>1), 'summary'=>array('table'=>1), 'tabindex'=>array('a'=>1, 'area'=>1, 'button'=>1, 'input'=>1, 'object'=>1, 'select'=>1, 'textarea'=>1), 'target'=>array('a'=>1, 'area'=>1, 'form'=>1), 'type'=>array('a'=>1, 'embed'=>1, 'object'=>1, 'param'=>1, 'script'=>1, 'input'=>1, 'li'=>1, 'ol'=>1, 'ul'=>1, 'button'=>1), 'usemap'=>array('img'=>1, 'input'=>1, 'object'=>1), 'valign'=>array('col'=>1, 'colgroup'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1), 'value'=>array('input'=>1, 'option'=>1, 'param'=>1, 'button'=>1, 'li'=>1), 'valuetype'=>array('param'=>1), 'vspace'=>array('applet'=>1, 'img'=>1, 'object'=>1), 'width'=>array('embed'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'object'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'applet'=>1, 'col'=>1, 'colgroup'=>1, 'pre'=>1), 'wmode'=>array('embed'=>1), 'xml:space'=>array('pre'=>1, 'script'=>1, 'style'=>1)); // Specific attrs
454 static $aNE = array('checked'=>1, 'compact'=>1, 'declare'=>1, 'defer'=>1, 'disabled'=>1, 'ismap'=>1, 'multiple'=>1, 'nohref'=>1, 'noresize'=>1, 'noshade'=>1, 'nowrap'=>1, 'readonly'=>1, 'selected'=>1); // 'Empty' attrs
455 static $aNP = array('action'=>1, 'cite'=>1, 'classid'=>1, 'codebase'=>1, 'data'=>1, 'href'=>1, 'longdesc'=>1, 'model'=>1, 'pluginspage'=>1, 'pluginurl'=>1, 'usemap'=>1); // Attrs needing URL protocol check; for attrs like onmouseover & src, using: '$n[0] != 'o' && strpos($n, 'src') === false'; 'style' separately handled
456 static $aNU = array('class'=>array('param'=>1, 'script'=>1), 'dir'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'id'=>array('script'=>1), 'lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'xml:lang'=>array('applet'=>1, 'br'=>1, 'iframe'=>1, 'param'=>1, 'script'=>1), 'onclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'ondblclick'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeydown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeypress'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onkeyup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousedown'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmousemove'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseout'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseover'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'onmouseup'=>array('applet'=>1, 'bdo'=>1, 'br'=>1, 'font'=>1, 'iframe'=>1, 'isindex'=>1, 'param'=>1, 'script'=>1), 'style'=>array('param'=>1, 'script'=>1), 'title'=>array('param'=>1, 'script'=>1)); // Univ attrs & exceptions
457
458 if($cf['lc_std_val']){
459 // predef attr vals like radio for $eAL & $aNE ele
460 static $aNL = array('all'=>1, 'baseline'=>1, 'bottom'=>1, 'button'=>1, 'center'=>1, 'char'=>1, 'checkbox'=>1, 'circle'=>1, 'col'=>1, 'colgroup'=>1, 'cols'=>1, 'data'=>1, 'default'=>1, 'file'=>1, 'get'=>1, 'groups'=>1, 'hidden'=>1, 'image'=>1, 'justify'=>1, 'left'=>1, 'ltr'=>1, 'middle'=>1, 'none'=>1, 'object'=>1, 'password'=>1, 'poly'=>1, 'post'=>1, 'preserve'=>1, 'radio'=>1, 'rect'=>1, 'ref'=>1, 'reset'=>1, 'right'=>1, 'row'=>1, 'rowgroup'=>1, 'rows'=>1, 'rtl'=>1, 'submit'=>1, 'text'=>1, 'top'=>1);
461 static $eAL = array('a'=>1, 'area'=>1, 'bdo'=>1, 'button'=>1, 'col'=>1, 'form'=>1, 'img'=>1, 'input'=>1, 'object'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1, 'xml:space'=>1);
462 $lcase = isset($eAL[$e]) ? 1 : 0;
463 }
464
465 $depTr = 0;
466 if($cf['no_deprecated_attr']){
467 // dep attr:applicable ele
468 static $aND = array('align'=>array('caption'=>1, 'div'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'legend'=>1, 'object'=>1, 'p'=>1, 'table'=>1), 'bgcolor'=>array('table'=>1, 'td'=>1, 'th'=>1, 'tr'=>1), 'border'=>array('img'=>1, 'object'=>1), 'bordercolor'=>array('table'=>1, 'td'=>1, 'tr'=>1), 'clear'=>array('br'=>1), 'compact'=>array('dl'=>1, 'ol'=>1, 'ul'=>1), 'height'=>array('td'=>1, 'th'=>1), 'hspace'=>array('img'=>1, 'object'=>1), 'language'=>array('script'=>1), 'name'=>array('a'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'map'=>1), 'noshade'=>array('hr'=>1), 'nowrap'=>array('td'=>1, 'th'=>1), 'size'=>array('hr'=>1), 'start'=>array('ol'=>1), 'type'=>array('li'=>1, 'ol'=>1, 'ul'=>1), 'value'=>array('li'=>1), 'vspace'=>array('img'=>1, 'object'=>1), 'width'=>array('hr'=>1, 'pre'=>1, 'td'=>1, 'th'=>1));
469 static $eAD = array('a'=>1, 'br'=>1, 'caption'=>1, 'div'=>1, 'dl'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'legend'=>1, 'li'=>1, 'map'=>1, 'object'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'script'=>1, 'table'=>1, 'td'=>1, 'th'=>1, 'tr'=>1, 'ul'=>1);
470 $depTr = isset($eAD[$e]) ? 1 : 0;
471 }
472
473 // attr name-vals
474 if(strpos($a, "\x01") !== false){$a = preg_replace('`\x01[^\x01]*\x01`', '', $a);} // No comments/CDATA secs
475 $mode = 0; $a = trim($a, ' /'); $aA = array();
476 while(strlen($a)){
477 $w = 0;
478 switch($mode){
479 case 0: // Attr name - min 2 chars; : in xml:lang
480 if(preg_match('`^[a-zA-Z][\-a-zA-Z:]+`', $a, $m)){
481 $nm = strtolower($m[0]);
482 $w = $mode = 1; $a = ltrim(substr_replace($a, '', 0, strlen($m[0])));
483 }
484 break; case 1:
485 if($a[0] == '='){ // =
486 $w = 1; $mode = 2; $a = ltrim($a, '= ');
487 }else{ // No val
488 $w = 1; $mode = 0; $a = ltrim($a);
489 $aA[$nm] = '';
490 }
491 break; case 2: // Attr val
492 if(preg_match('`^"[^"]*"`', $a, $m) or preg_match("`^'[^']*'`", $a, $m) or preg_match("`^\s*[^\s\"']+`", $a, $m)){
493 $m = $m[0]; $w = 1; $mode = 0; $a = ltrim(substr_replace($a, '', 0, strlen($m)));
494 $aA[$nm] = trim(($m[0] == '"' or $m[0] == '\'') ? substr($m, 1, -1) : $m);
495 }
496 break;
497 }
498 if($w == 0){ // Parse errs, deal with space, " & '
499 $a = preg_replace('`^(?:"[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*`', '', $a);
500 $mode = 0;
501 }
502 }
503 if($mode == 1){$aA[$nm] = '';}
504
505 // clean attrs - remove invalids, escape ", values for 'empty' attr, lowercase predefined values, remove ones with unfit values, anti-spam, check scheme & expressions in style props, check scheme in other attr
506 global $spec;
507 $rl = isset($spec[$e]) ? $spec[$e] : array();
508 $a = array(); $nfr = 0;
509 foreach($aA as $k=>$v){
510 if(!isset($cf['deny_attribute'][$k]) && ((!isset($rl['n'][$k]) && !isset($rl['n']['*'])) or isset($rl[$k])) && (isset($aN[$k][$e]) or (isset($aNU[$k]) && !isset($aNU[$k][$e])))){
511 if(isset($aNE[$k])){$v = $k;}
512 elseif(!empty($lcase) && (($e != 'button' or $e != 'input') or $k == 'type')){ // Rather loose but ?not cause issues
513 $v = (isset($aNL[($v2 = strtolower($v))])) ? $v2 : $v;
514 }
515 if($k == 'style'){
516 $v = preg_replace_callback('`((?:u|&#(?:x75|117);)(?:r|&#(?:x72|114);)(?:l|&#(?:x6c|108);)(?:\(|&#(?:x28|40);)(?: |&#(?:x20|32);)*(?:\'|"|&(?:quot|apos|34|39|x22|x27);)?)(.+)((?:\'|"|&(?:quot|apos|34|39|x22|x27);)?(?: |&#(?:x20|32);)*(?:\)|&#(?:x29|41);))`iS', 'hl_prot', $v);
517 if(!$cf['css_expression']){
518 $v = preg_replace('`(?::|&#(?:x3a|58);)(?: |&#(?:x20|32);)*e.+?n(?: |&#(?:x20|32);)*(?:\(|&#(?:x28|40);).*(?:\)|&#(?:x29|41);)`iS', '', $v);
519 }
520 }elseif(isset($aNP[$k]) or strpos($k, 'src') !== false or $k[0] == 'o'){
521 $v = hl_prot($v, $k);
522 if($k == 'href'){ // Anti-spam
523 if($cf['anti_mail_spam'] && strpos($v, 'mailto:') === 0){
524 $v = str_replace('@', htmlspecialchars($cf['anti_mail_spam']), $v);
525 }elseif($cf['anti_link_spam']){
526 $r1 = $cf['anti_link_spam'][1];
527 if(!empty($r1) && preg_match($r1, $v)){continue;}
528 $r0 = $cf['anti_link_spam'][0];
529 if(!empty($r0) && preg_match($r0, $v)){
530 if(isset($a['rel'])){
531 if(!preg_match('`\bnofollow\b`i', $a['rel'])){$a['rel'] .= ' nofollow';}
532 }elseif(isset($aA['rel'])){
533 if(!preg_match('`\bnofollow\b`i', $aA['rel'])){$nfr = 1;}
534 }else{$a['rel'] = 'nofollow';}
535 }
536 }
537 }
538 }
539 if(isset($rl[$k]) && is_array($rl[$k]) && ($v = hl_attrval($v, $rl[$k])) === 0){continue;}
540 $a[$k] = str_replace('"', '&quot;', $v);
541 }
542 }
543 if($nfr){$a['rel'] = isset($a['rel']) ? $a['rel']. ' nofollow' : 'nofollow';}
544
545 // rqd attr
546 static $eAR = array('area'=>array('alt'=>'area'), 'bdo'=>array('dir'=>'ltr'), 'form'=>array('action'=>''), 'img'=>array('src'=>'', 'alt'=>'image'), 'map'=>array('name'=>''), 'optgroup'=>array('label'=>''), 'param'=>array('name'=>''), 'script'=>array('type'=>'text/javascript'), 'textarea'=>array('rows'=>'10', 'cols'=>'50'));
547 if(isset($eAR[$e])){
548 foreach($eAR[$e] as $k=>$v){
549 if(!isset($a[$k])){$a[$k] = isset($v[0]) ? $v : $k;}
550 }
551 }
552
553 // depr attrs
554 if($depTr){
555 $c = array();
556 foreach($a as $k=>$v){
557 if($k == 'style' or !isset($aND[$k][$e])){continue;}
558 if($k == 'align'){
559 unset($a['align']);
560 if($e == 'img' && ($v == 'left' or $v == 'right')){$c[] = 'float: '. $v;}
561 elseif(($e == 'div' or $e == 'table') && $v == 'center'){$c[] = 'margin: auto';}
562 else{$c[] = 'text-align: '. $v;}
563 }elseif($k == 'bgcolor'){
564 unset($a['bgcolor']);
565 $c[] = 'background-color: '. $v;
566 }elseif($k == 'border'){
567 unset($a['border']); $c[] = "border: {$v}px";
568 }elseif($k == 'bordercolor'){
569 unset($a['bordercolor']); $c[] = 'border-color: '. $v;
570 }elseif($k == 'clear'){
571 unset($a['clear']); $c[] = 'clear: '. ($v != 'all' ? $v : 'both');
572 }elseif($k == 'compact'){
573 unset($a['compact']); $c[] = 'font-size: 85%';
574 }elseif($k == 'height' or $k == 'width'){
575 unset($a[$k]); $c[] = $k. ': '. ($v[0] != '*' ? $v. (ctype_digit($v) ? 'px' : '') : 'auto');
576 }elseif($k == 'hspace'){
577 unset($a['hspace']); $c[] = "margin-left: {$v}px; margin-right: {$v}px";
578 }elseif($k == 'language' && !isset($a['type'])){
579 unset($a['language']);
580 $a['type'] = 'text/'. strtolower($v);
581 }elseif($k == 'name'){
582 if($cf['no_deprecated_attr'] == 2 or ($e != 'a' && $e != 'map')){unset($a['name']);}
583 if(!isset($a['id']) && preg_match('`[a-zA-Z][a-zA-Z\d.:_\-]*`', $v)){$a['id'] = $v;}
584 }elseif($k == 'noshade'){
585 unset($a['noshade']); $c[] = 'border-style: none; border: 0; background-color: gray; color: gray';
586 }elseif($k == 'nowrap'){
587 unset($a['nowrap']); $c[] = 'white-space: nowrap';
588 }elseif($k == 'size'){
589 unset($a['size']); $c[] = 'size: '. $v. 'px';
590 }elseif($k == 'start' or $k == 'value'){
591 unset($a[$k]);
592 }elseif($k == 'type'){
593 unset($a['type']);
594 static $ol_type = array('i'=>'lower-roman', 'I'=>'upper-roman', 'a'=>'lower-latin', 'A'=>'upper-latin', '1'=>'decimal');
595 $c[] = 'list-style-type: '. (isset($ol_type[$v]) ? $ol_type[$v] : 'decimal');
596 }elseif($k == 'vspace'){
597 unset($a['vspace']); $c[] = "margin-top: {$v}px; margin-bottom: {$v}px";
598 }
599 }
600 if(count($c)){
601 $c = implode('; ', $c);
602 $a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;'). '; '. $c. ';': $c. ';';
603 }
604 }
605 // unique IDs
606 if($cf['unique_ids'] && isset($a['id'])){ // XHTML spec
607 if(!preg_match('`^[A-Za-z][A-Za-z0-9_\-.:]*$`', ($id = $a['id'])) or (isset($GLOBALS['hl_Ids'][$id]) && $cf['unique_ids'] == 1)){unset($a['id']);
608 }else{
609 while(isset($GLOBALS['hl_Ids'][$id])){$id = $cf['unique_ids']. $id;}
610 $GLOBALS['hl_Ids'][($a['id'] = $id)] = 1;
611 }
612 }
613 // xml:lang
614 if($cf['xml:lang'] && isset($a['lang'])){
615 $a['xml:lang'] = isset($a['xml:lang']) ? $a['xml:lang'] : $a['lang'];
616 if($cf['xml:lang'] == 2){unset($a['lang']);}
617 }
618 // for transformed tag
619 if(!empty($trt)){
620 $a['style'] = isset($a['style']) ? rtrim($a['style'], ' ;'). '; '. $trt : $trt;
621 }
622 // final attr
623 $aA = '';
624 foreach($a as $k=>$v){$aA .= " {$k}=\"{$v}\"";}
625 // return with empty ele's slash
626 return "<{$e}{$aA}". (isset($eE[$e]) ? ' /' : ''). '>';
627 // eof
628 }
629
630 function hl_tag2(&$e, &$a, $t=1){
631 // transform tag
632 if($e == 'center'){$e = 'div'; return 'text-align: center;';}
633 if($e == 'dir' or $e == 'menu'){$e = 'ul'; return '';}
634 if($e == 's' or $e == 'strike'){$e = 'span'; return 'text-decoration: line-through;';}
635 if($e == 'u'){$e = 'span'; return 'text-decoration: underline;';}
636 static $fs = array('0'=>'xx-small', '1'=>'xx-small', '2'=>'small', '3'=>'medium', '4'=>'large', '5'=>'x-large', '6'=>'xx-large', '7'=>'300%', '-1'=>'smaller', '-2'=>'60%', '+1'=>'larger', '+2'=>'150%', '+3'=>'200%', '+4'=>'300%');
637 if($e == 'font'){
638 $a2 = '';
639 if(preg_match('`face\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m)){
640 $a2 .= ' font-family: '. str_replace('"', '\'', trim($m[2])). ';';
641 }
642 if(preg_match('`color\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m)){
643 $a2 .= ' color: '. trim($m[2]). ';';
644 }
645 if(preg_match('`size\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m) && isset($fs[($m = trim($m[2]))])){
646 $a2 .= ' font-size: '. $fs[$m]. ';';
647 }
648 $e = 'span'; return ltrim($a2);
649 }
650 if($t == 2){$e = 0; return 0;}
651 return '';
652 // eof
653 }
654
655 function hl_version(){
656 // version
657 return '1.0.8';
658 // eof
659 }
660
661 function kses($in, $ok_htm, $ok_prots = array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'gopher', 'mailto')){
662 // kses compat
663 foreach($ok_htm as $k=>$v){
664 $ok_htm[$k]['n']['*'] = 1;
665 }
666 $cf['cdata'] = $cf['comment'] = $cf['lc_std_val'] = $cf['make_tag_strict'] = $cf['no_deprecated_attr'] = $cf['unique_ids'] = 0;
667 $cf['css_expression'] = $cf['keep_bad'] = 1;
668 $cf['elements'] = count($ok_htm) ? strtolower(implode(',', array_keys($ok_htm))) : '-*';
669 $cf['hook'] = 'kses_hook';
670 $cf['schemes'] = '*:'. implode(',', $ok_prots);
671 return htmLawed($in, $cf, $ok_htm);
672 // eof
673 }
674
675 function kses_hook($string, &$cf, &$spec){
676 // kses compat
677 return $string;
678 // eof
fbc3b5b enable basiclti consumer
drlippman authored
679 }
680
681 function remove_instr_only($str) {
682 if (strpos($str,'[LTI:')!==false) {
683 $str = preg_replace('/\[LTI:[^\]]*\]/', '', $str);
684 }
685 return $str;
46cb2c3 Added HTML filtering
drlippman authored
686 }
Something went wrong with that request. Please try again.