Skip to content

Commit aa7ae9f

Browse files
committed
make other Nodes utils that act on whitespace knnow XML's idea of WS
see #39
1 parent 473c2dc commit aa7ae9f

File tree

2 files changed

+226
-15
lines changed

2 files changed

+226
-15
lines changed

src/main/net-core/Util/Nodes.cs

Lines changed: 135 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,13 @@ public static IDictionary<XmlQualifiedName, string>
8181
/// empty text or CDATA nodes and where all textual content
8282
/// including attribute values or comments are trimmed.
8383
/// </summary>
84+
/// <remarks>
85+
/// <para>
86+
/// Unlike <see cref="StripXmlWhitespace"/> this uses Unicode's idea
87+
/// of whitespace rather than the more restricted subset considered
88+
/// whitespace by XML.
89+
/// </para>
90+
/// </remarks>
8491
public static XmlNode StripWhitespace(XmlNode original) {
8592
XmlNode cloned = original.CloneNode(true);
8693
cloned.Normalize();
@@ -96,9 +103,16 @@ public static XmlNode StripWhitespace(XmlNode original) {
96103
/// characters XML considers whitespace according to
97104
/// <see href="https://www.w3.org/TR/xml11/#NT-S"/>.
98105
/// </summary>
106+
/// <remarks>
107+
/// <para>
108+
/// Unlike <see cref="StripWhitespace"/> this uses XML's idea
109+
/// of whitespace rather than the more extensive set considered
110+
/// whitespace by Unicode.
111+
/// </para>
99112
/// <para>
100113
/// since XMLUnit 2.10.0
101114
/// </para>
115+
/// </remarks>
102116
public static XmlNode StripXmlWhitespace(XmlNode original) {
103117
XmlNode cloned = original.CloneNode(true);
104118
cloned.Normalize();
@@ -118,6 +132,15 @@ public static XmlNode StripXmlWhitespace(XmlNode original) {
118132
/// characters are replaced by space characters and
119133
/// consecutive whitespace characaters are collapsed.
120134
/// </para>
135+
/// <para>
136+
/// This method is similiar to <see cref="StripWhitespace"/>
137+
/// but in addition "normalizes" whitespace.
138+
/// </para>
139+
/// <para>
140+
/// Unlike <see cref="NormalizeXmlWhitespace"/> this uses Unicode's idea
141+
/// of whitespace rather than the more restricted subset considered
142+
/// whitespace by XML.
143+
/// </para>
121144
/// </remarks>
122145
public static XmlNode NormalizeWhitespace(XmlNode original) {
123146
XmlNode cloned = original.CloneNode(true);
@@ -126,6 +149,38 @@ public static XmlNode NormalizeWhitespace(XmlNode original) {
126149
return cloned;
127150
}
128151

152+
/// <summary>
153+
/// Creates a new Node (of the same type as the original node)
154+
/// that is similar to the orginal but doesn't contain any
155+
/// empty text or CDATA nodes and where all textual content
156+
/// including attribute values or comments are normalized.
157+
/// </summary>
158+
/// <remarks>
159+
/// <para>
160+
/// "normalized" in this context means all XML whitespace
161+
/// characters are replaced by space characters and
162+
/// consecutive XML whitespace characaters are collapsed.
163+
/// </para>
164+
/// <para>
165+
/// This method is similiar to <see cref="StripXmlWhitespace"/>
166+
/// but in addition "normalizes" XML whitespace.
167+
/// </para>
168+
/// <para>
169+
/// Unlike <see cref="NormalizeWhitespace"/> this uses XML's idea
170+
/// of whitespace rather than the more extensive set considered
171+
/// whitespace by Unicode.
172+
/// </para>
173+
/// <para>
174+
/// since XMLUnit 2.10.0
175+
/// </para>
176+
/// </remarks>
177+
public static XmlNode NormalizeXmlWhitespace(XmlNode original) {
178+
XmlNode cloned = original.CloneNode(true);
179+
cloned.Normalize();
180+
HandleWsRec(cloned, XmlTrimAndNormalizeValue);
181+
return cloned;
182+
}
183+
129184
/// <summary>
130185
/// Creates a new Node (of the same type as the original node)
131186
/// that is similar to the orginal but doesn't contain any
@@ -136,27 +191,58 @@ public static XmlNode NormalizeWhitespace(XmlNode original) {
136191
/// This doesn't have any effect if applied to a text or CDATA
137192
/// node itself.
138193
/// </para>
194+
/// <para>
195+
/// Unlike <see cref="StripXmlElementContentWhitespace"/> this uses Unicode's idea
196+
/// of whitespace rather than the more restricted subset considered
197+
/// whitespace by XML.
198+
/// </para>
139199
/// <para>
140200
/// since XMLUnit 2.6.0
141201
/// </para>
142202
/// </remarks>
143203
public static XmlNode StripElementContentWhitespace(XmlNode original) {
144204
XmlNode cloned = original.CloneNode(true);
145205
cloned.Normalize();
146-
StripECW(cloned);
206+
StripECW(cloned, TrimValue);
147207
return cloned;
148208
}
149209

150210
/// <summary>
151-
/// Returns the nodes' value trimmed of all whitespace.
211+
/// Creates a new Node (of the same type as the original node)
212+
/// that is similar to the orginal but doesn't contain any
213+
/// text or CDATA nodes that only consist of XML whitespace.
214+
/// </summary>
215+
/// <remarks>
216+
/// <para>
217+
/// This doesn't have any effect if applied to a text or CDATA
218+
/// node itself.
219+
/// </para>
220+
/// <para>
221+
/// Unlike <see cref="StripXmlElementContentWhitespace"/> this uses XML's idea
222+
/// of whitespace rather than the more extensive set considered
223+
/// whitespace by Unicode.
224+
/// </para>
225+
/// <para>
226+
/// since XMLUnit 2.10.0
227+
/// </para>
228+
/// </remarks>
229+
public static XmlNode StripXmlElementContentWhitespace(XmlNode original) {
230+
XmlNode cloned = original.CloneNode(true);
231+
cloned.Normalize();
232+
StripECW(cloned, XmlTrimValue);
233+
return cloned;
234+
}
235+
152236
/// <summary>
237+
/// Returns the nodes' value trimmed of all whitespace.
238+
/// </summary>
153239
private static String TrimValue(XmlNode n) {
154240
return n.Value.Trim();
155241
}
156242

157243
/// <summary>
158244
/// Returns the nodes' value trimmed of all whitespace and Normalized
159-
/// <summary>
245+
/// </summary>
160246
private static String TrimAndNormalizeValue(XmlNode n) {
161247
return Normalize(TrimValue(n));
162248
}
@@ -167,11 +253,18 @@ private static String TrimAndNormalizeValue(XmlNode n) {
167253

168254
/// <summary>
169255
/// Returns the nodes' value trimmed of all characters XML considers whitespace.
170-
/// <summary>
256+
/// </summary>
171257
private static String XmlTrimValue(XmlNode n) {
172258
return n.Value.Trim(XML_WHITESPACE_CHARS);
173259
}
174260

261+
/// <summary>
262+
/// Returns the nodes' value trimmed of all whitespace and Normalized
263+
/// </summary>
264+
private static String XmlTrimAndNormalizeValue(XmlNode n) {
265+
return XmlNormalize(XmlTrimValue(n));
266+
}
267+
175268
/// <summary>
176269
/// Trims textual content of this node, removes empty text and
177270
/// CDATA children, recurses into its child nodes.
@@ -206,16 +299,49 @@ private static void HandleWsRec(XmlNode n, Func<XmlNode, String> handleWs) {
206299
/// Normalize a string.
207300
/// </summary>
208301
/// <remarks>
302+
/// <para>
209303
/// "normalized" in this context means all whitespace
210304
/// characters are replaced by space characters and
211-
/// consecutive whitespace characaters are collapsed.
305+
/// consecutive whitespace characters are collapsed.
306+
/// </para>
307+
/// <para>
308+
/// Unlike <see cref="XmlNormalize"/> this uses Unicode's idea
309+
/// of whitespace rather than the more restricted subset considered
310+
/// whitespace by XML.
311+
/// </para>
212312
/// </remarks>
213313
internal static string Normalize(string s) {
314+
return Normalize(s, c => char.IsWhiteSpace(c));
315+
}
316+
317+
/// <summary>
318+
/// Normalize a string with regard to XML whitespace.
319+
/// </summary>
320+
/// <remarks>
321+
/// <para>
322+
/// "normalized" in this context means all XML whitespace
323+
/// characters are replaced by space characters and
324+
/// consecutive XML whitespace characters are collapsed.
325+
/// </para>
326+
/// <para>
327+
/// Unlike <see cref="Normalize"/> this uses XML's idea
328+
/// of whitespace rather than the more extensive set considered
329+
/// whitespace by Unicode.
330+
/// </para>
331+
/// <para>
332+
/// since XMLUnit 2.10.0
333+
/// </para>
334+
/// </remarks>
335+
internal static string XmlNormalize(string s) {
336+
return Normalize(s, c => XML_WHITESPACE_CHARS.Contains(c));
337+
}
338+
339+
private static string Normalize(string s, Predicate<char> isWhiteSpace) {
214340
StringBuilder sb = new StringBuilder();
215341
bool changed = false;
216342
bool lastCharWasWS = false;
217343
foreach (char c in s) {
218-
if (char.IsWhiteSpace(c)) {
344+
if (isWhiteSpace(c)) {
219345
if (!lastCharWasWS) {
220346
sb.Append(SPACE);
221347
changed |= (c != SPACE);
@@ -231,13 +357,13 @@ internal static string Normalize(string s) {
231357
return changed ? sb.ToString() : s;
232358
}
233359

234-
private static void StripECW(XmlNode n) {
360+
private static void StripECW(XmlNode n, Func<XmlNode, String> trimmer) {
235361
LinkedList<XmlNode> toRemove = new LinkedList<XmlNode>();
236362
foreach (XmlNode child in n.ChildNodes) {
237-
StripECW(child);
363+
StripECW(child, trimmer);
238364
if (!(n is XmlAttribute)
239365
&& IsTextualContentNode(child)
240-
&& child.Value.Trim().Length == 0) {
366+
&& trimmer(child).Length == 0) {
241367
toRemove.AddLast(child);
242368
}
243369
}

0 commit comments

Comments
 (0)