This repository has been archived by the owner on Jan 30, 2020. It is now read-only.
/
StripTags.php
292 lines (257 loc) · 9.52 KB
/
StripTags.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
<?php
/**
* Zend Framework (http://framework.zend.com/)
*
* @link http://github.com/zendframework/zf2 for the canonical source repository
* @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
namespace Zend\Filter;
use Traversable;
use Zend\Stdlib\ArrayUtils;
class StripTags extends AbstractFilter
{
/**
* Unique ID prefix used for allowing comments
*/
const UNIQUE_ID_PREFIX = '__Zend_Filter_StripTags__';
/**
* Array of allowed tags and allowed attributes for each allowed tag
*
* Tags are stored in the array keys, and the array values are themselves
* arrays of the attributes allowed for the corresponding tag.
*
* @var array
*/
protected $tagsAllowed = [];
/**
* Array of allowed attributes for all allowed tags
*
* Attributes stored here are allowed for all of the allowed tags.
*
* @var array
*/
protected $attributesAllowed = [];
/**
* Sets the filter options
* Allowed options are
* 'allowTags' => Tags which are allowed
* 'allowAttribs' => Attributes which are allowed
* 'allowComments' => Are comments allowed ?
*
* @param string|array|Traversable $options
*/
public function __construct($options = null)
{
if ($options instanceof Traversable) {
$options = ArrayUtils::iteratorToArray($options);
}
if ((! is_array($options)) || (is_array($options) && ! array_key_exists('allowTags', $options) &&
! array_key_exists('allowAttribs', $options) && ! array_key_exists('allowComments', $options))) {
$options = func_get_args();
$temp['allowTags'] = array_shift($options);
if (! empty($options)) {
$temp['allowAttribs'] = array_shift($options);
}
if (! empty($options)) {
$temp['allowComments'] = array_shift($options);
}
$options = $temp;
}
if (array_key_exists('allowTags', $options)) {
$this->setTagsAllowed($options['allowTags']);
}
if (array_key_exists('allowAttribs', $options)) {
$this->setAttributesAllowed($options['allowAttribs']);
}
}
/**
* Returns the tagsAllowed option
*
* @return array
*/
public function getTagsAllowed()
{
return $this->tagsAllowed;
}
/**
* Sets the tagsAllowed option
*
* @param array|string $tagsAllowed
* @return self Provides a fluent interface
*/
public function setTagsAllowed($tagsAllowed)
{
if (! is_array($tagsAllowed)) {
$tagsAllowed = [$tagsAllowed];
}
foreach ($tagsAllowed as $index => $element) {
// If the tag was provided without attributes
if (is_int($index) && is_string($element)) {
// Canonicalize the tag name
$tagName = strtolower($element);
// Store the tag as allowed with no attributes
$this->tagsAllowed[$tagName] = [];
} elseif (is_string($index) && (is_array($element) || is_string($element))) {
// Otherwise, if a tag was provided with attributes
// Canonicalize the tag name
$tagName = strtolower($index);
// Canonicalize the attributes
if (is_string($element)) {
$element = [$element];
}
// Store the tag as allowed with the provided attributes
$this->tagsAllowed[$tagName] = [];
foreach ($element as $attribute) {
if (is_string($attribute)) {
// Canonicalize the attribute name
$attributeName = strtolower($attribute);
$this->tagsAllowed[$tagName][$attributeName] = null;
}
}
}
}
return $this;
}
/**
* Returns the attributesAllowed option
*
* @return array
*/
public function getAttributesAllowed()
{
return $this->attributesAllowed;
}
/**
* Sets the attributesAllowed option
*
* @param array|string $attributesAllowed
* @return self Provides a fluent interface
*/
public function setAttributesAllowed($attributesAllowed)
{
if (! is_array($attributesAllowed)) {
$attributesAllowed = [$attributesAllowed];
}
// Store each attribute as allowed
foreach ($attributesAllowed as $attribute) {
if (is_string($attribute)) {
// Canonicalize the attribute name
$attributeName = strtolower($attribute);
$this->attributesAllowed[$attributeName] = null;
}
}
return $this;
}
/**
* Defined by Zend\Filter\FilterInterface
*
* If the value provided is non-scalar, the value will remain unfiltered
*
* @todo improve docblock descriptions
* @param string $value
* @return string|mixed
*/
public function filter($value)
{
if (! is_scalar($value)) {
return $value;
}
$value = (string) $value;
// Strip HTML comments first
$open = '<!--';
$openLen = strlen($open);
$close = '-->';
$closeLen = strlen($close);
while (($start = strpos($value, $open)) !== false) {
$end = strpos($value, $close, $start + $openLen);
if ($end === false) {
$value = substr($value, 0, $start);
} else {
$value = substr($value, 0, $start) . substr($value, $end + $closeLen);
}
}
// Initialize accumulator for filtered data
$dataFiltered = '';
// Parse the input data iteratively as regular pre-tag text followed by a
// tag; either may be empty strings
preg_match_all('/([^<]*)(<?[^>]*>?)/', (string) $value, $matches);
// Iterate over each set of matches
foreach ($matches[1] as $index => $preTag) {
// If the pre-tag text is non-empty, strip any ">" characters from it
if (strlen($preTag)) {
$preTag = str_replace('>', '', $preTag);
}
// If a tag exists in this match, then filter the tag
$tag = $matches[2][$index];
if (strlen($tag)) {
$tagFiltered = $this->_filterTag($tag);
} else {
$tagFiltered = '';
}
// Add the filtered pre-tag text and filtered tag to the data buffer
$dataFiltered .= $preTag . $tagFiltered;
}
// Return the filtered data
return $dataFiltered;
}
/**
* Filters a single tag against the current option settings
*
* @param string $tag
* @return string
*/
// @codingStandardsIgnoreStart
protected function _filterTag($tag)
{
// @codingStandardsIgnoreEnd
// Parse the tag into:
// 1. a starting delimiter (mandatory)
// 2. a tag name (if available)
// 3. a string of attributes (if available)
// 4. an ending delimiter (if available)
$isMatch = preg_match('~(</?)(\w*)((/(?!>)|[^/>])*)(/?>)~', $tag, $matches);
// If the tag does not match, then strip the tag entirely
if (! $isMatch) {
return '';
}
// Save the matches to more meaningfully named variables
$tagStart = $matches[1];
$tagName = strtolower($matches[2]);
$tagAttributes = $matches[3];
$tagEnd = $matches[5];
// If the tag is not an allowed tag, then remove the tag entirely
if (! isset($this->tagsAllowed[$tagName])) {
return '';
}
// Trim the attribute string of whitespace at the ends
$tagAttributes = trim($tagAttributes);
// If there are non-whitespace characters in the attribute string
if (strlen($tagAttributes)) {
// Parse iteratively for well-formed attributes
preg_match_all('/([\w-]+)\s*=\s*(?:(")(.*?)"|(\')(.*?)\')/s', $tagAttributes, $matches);
// Initialize valid attribute accumulator
$tagAttributes = '';
// Iterate over each matched attribute
foreach ($matches[1] as $index => $attributeName) {
$attributeName = strtolower($attributeName);
$attributeDelimiter = empty($matches[2][$index]) ? $matches[4][$index] : $matches[2][$index];
$attributeValue = (strlen($matches[3][$index]) == 0) ? $matches[5][$index] : $matches[3][$index];
// If the attribute is not allowed, then remove it entirely
if (! array_key_exists($attributeName, $this->tagsAllowed[$tagName])
&& ! array_key_exists($attributeName, $this->attributesAllowed)) {
continue;
}
// Add the attribute to the accumulator
$tagAttributes .= " $attributeName=" . $attributeDelimiter
. $attributeValue . $attributeDelimiter;
}
}
// Reconstruct tags ending with "/>" as backwards-compatible XHTML tag
if (strpos($tagEnd, '/') !== false) {
$tagEnd = " $tagEnd";
}
// Return the filtered tag
return $tagStart . $tagName . $tagAttributes . $tagEnd;
}
}