@@ -81,6 +81,13 @@ public static IDictionary<XmlQualifiedName, string>
8181 /// empty text or CDATA nodes and where all textual content
8282 /// including attribute values or comments are trimmed.
8383 /// </summary>
84+ /// <remarks>
85+ /// <para>
86+ /// Unlike <see cref="StripXmlWhitespace"/> this uses Unicode's idea
87+ /// of whitespace rather than the more restricted subset considered
88+ /// whitespace by XML.
89+ /// </para>
90+ /// </remarks>
8491 public static XmlNode StripWhitespace ( XmlNode original ) {
8592 XmlNode cloned = original . CloneNode ( true ) ;
8693 cloned . Normalize ( ) ;
@@ -96,9 +103,16 @@ public static XmlNode StripWhitespace(XmlNode original) {
96103 /// characters XML considers whitespace according to
97104 /// <see href="https://www.w3.org/TR/xml11/#NT-S"/>.
98105 /// </summary>
106+ /// <remarks>
107+ /// <para>
108+ /// Unlike <see cref="StripWhitespace"/> this uses XML's idea
109+ /// of whitespace rather than the more extensive set considered
110+ /// whitespace by Unicode.
111+ /// </para>
99112 /// <para>
100113 /// since XMLUnit 2.10.0
101114 /// </para>
115+ /// </remarks>
102116 public static XmlNode StripXmlWhitespace ( XmlNode original ) {
103117 XmlNode cloned = original . CloneNode ( true ) ;
104118 cloned . Normalize ( ) ;
@@ -118,6 +132,15 @@ public static XmlNode StripXmlWhitespace(XmlNode original) {
118132 /// characters are replaced by space characters and
119133 /// consecutive whitespace characaters are collapsed.
120134 /// </para>
135+ /// <para>
136+ /// This method is similiar to <see cref="StripWhitespace"/>
137+ /// but in addition "normalizes" whitespace.
138+ /// </para>
139+ /// <para>
140+ /// Unlike <see cref="NormalizeXmlWhitespace"/> this uses Unicode's idea
141+ /// of whitespace rather than the more restricted subset considered
142+ /// whitespace by XML.
143+ /// </para>
121144 /// </remarks>
122145 public static XmlNode NormalizeWhitespace ( XmlNode original ) {
123146 XmlNode cloned = original . CloneNode ( true ) ;
@@ -126,6 +149,38 @@ public static XmlNode NormalizeWhitespace(XmlNode original) {
126149 return cloned ;
127150 }
128151
152+ /// <summary>
153+ /// Creates a new Node (of the same type as the original node)
154+ /// that is similar to the orginal but doesn't contain any
155+ /// empty text or CDATA nodes and where all textual content
156+ /// including attribute values or comments are normalized.
157+ /// </summary>
158+ /// <remarks>
159+ /// <para>
160+ /// "normalized" in this context means all XML whitespace
161+ /// characters are replaced by space characters and
162+ /// consecutive XML whitespace characaters are collapsed.
163+ /// </para>
164+ /// <para>
165+ /// This method is similiar to <see cref="StripXmlWhitespace"/>
166+ /// but in addition "normalizes" XML whitespace.
167+ /// </para>
168+ /// <para>
169+ /// Unlike <see cref="NormalizeWhitespace"/> this uses XML's idea
170+ /// of whitespace rather than the more extensive set considered
171+ /// whitespace by Unicode.
172+ /// </para>
173+ /// <para>
174+ /// since XMLUnit 2.10.0
175+ /// </para>
176+ /// </remarks>
177+ public static XmlNode NormalizeXmlWhitespace ( XmlNode original ) {
178+ XmlNode cloned = original . CloneNode ( true ) ;
179+ cloned . Normalize ( ) ;
180+ HandleWsRec ( cloned , XmlTrimAndNormalizeValue ) ;
181+ return cloned ;
182+ }
183+
129184 /// <summary>
130185 /// Creates a new Node (of the same type as the original node)
131186 /// that is similar to the orginal but doesn't contain any
@@ -136,27 +191,58 @@ public static XmlNode NormalizeWhitespace(XmlNode original) {
136191 /// This doesn't have any effect if applied to a text or CDATA
137192 /// node itself.
138193 /// </para>
194+ /// <para>
195+ /// Unlike <see cref="StripXmlElementContentWhitespace"/> this uses Unicode's idea
196+ /// of whitespace rather than the more restricted subset considered
197+ /// whitespace by XML.
198+ /// </para>
139199 /// <para>
140200 /// since XMLUnit 2.6.0
141201 /// </para>
142202 /// </remarks>
143203 public static XmlNode StripElementContentWhitespace ( XmlNode original ) {
144204 XmlNode cloned = original . CloneNode ( true ) ;
145205 cloned . Normalize ( ) ;
146- StripECW ( cloned ) ;
206+ StripECW ( cloned , TrimValue ) ;
147207 return cloned ;
148208 }
149209
150210 /// <summary>
151- /// Returns the nodes' value trimmed of all whitespace.
211+ /// Creates a new Node (of the same type as the original node)
212+ /// that is similar to the orginal but doesn't contain any
213+ /// text or CDATA nodes that only consist of XML whitespace.
214+ /// </summary>
215+ /// <remarks>
216+ /// <para>
217+ /// This doesn't have any effect if applied to a text or CDATA
218+ /// node itself.
219+ /// </para>
220+ /// <para>
221+ /// Unlike <see cref="StripXmlElementContentWhitespace"/> this uses XML's idea
222+ /// of whitespace rather than the more extensive set considered
223+ /// whitespace by Unicode.
224+ /// </para>
225+ /// <para>
226+ /// since XMLUnit 2.10.0
227+ /// </para>
228+ /// </remarks>
229+ public static XmlNode StripXmlElementContentWhitespace ( XmlNode original ) {
230+ XmlNode cloned = original . CloneNode ( true ) ;
231+ cloned . Normalize ( ) ;
232+ StripECW ( cloned , XmlTrimValue ) ;
233+ return cloned ;
234+ }
235+
152236 /// <summary>
237+ /// Returns the nodes' value trimmed of all whitespace.
238+ /// </summary>
153239 private static String TrimValue ( XmlNode n ) {
154240 return n . Value . Trim ( ) ;
155241 }
156242
157243 /// <summary>
158244 /// Returns the nodes' value trimmed of all whitespace and Normalized
159- /// <summary>
245+ /// </ summary>
160246 private static String TrimAndNormalizeValue ( XmlNode n ) {
161247 return Normalize ( TrimValue ( n ) ) ;
162248 }
@@ -167,11 +253,18 @@ private static String TrimAndNormalizeValue(XmlNode n) {
167253
168254 /// <summary>
169255 /// Returns the nodes' value trimmed of all characters XML considers whitespace.
170- /// <summary>
256+ /// </ summary>
171257 private static String XmlTrimValue ( XmlNode n ) {
172258 return n . Value . Trim ( XML_WHITESPACE_CHARS ) ;
173259 }
174260
261+ /// <summary>
262+ /// Returns the nodes' value trimmed of all whitespace and Normalized
263+ /// </summary>
264+ private static String XmlTrimAndNormalizeValue ( XmlNode n ) {
265+ return XmlNormalize ( XmlTrimValue ( n ) ) ;
266+ }
267+
175268 /// <summary>
176269 /// Trims textual content of this node, removes empty text and
177270 /// CDATA children, recurses into its child nodes.
@@ -206,16 +299,49 @@ private static void HandleWsRec(XmlNode n, Func<XmlNode, String> handleWs) {
206299 /// Normalize a string.
207300 /// </summary>
208301 /// <remarks>
302+ /// <para>
209303 /// "normalized" in this context means all whitespace
210304 /// characters are replaced by space characters and
211- /// consecutive whitespace characaters are collapsed.
305+ /// consecutive whitespace characters are collapsed.
306+ /// </para>
307+ /// <para>
308+ /// Unlike <see cref="XmlNormalize"/> this uses Unicode's idea
309+ /// of whitespace rather than the more restricted subset considered
310+ /// whitespace by XML.
311+ /// </para>
212312 /// </remarks>
213313 internal static string Normalize ( string s ) {
314+ return Normalize ( s , c => char . IsWhiteSpace ( c ) ) ;
315+ }
316+
317+ /// <summary>
318+ /// Normalize a string with regard to XML whitespace.
319+ /// </summary>
320+ /// <remarks>
321+ /// <para>
322+ /// "normalized" in this context means all XML whitespace
323+ /// characters are replaced by space characters and
324+ /// consecutive XML whitespace characters are collapsed.
325+ /// </para>
326+ /// <para>
327+ /// Unlike <see cref="Normalize"/> this uses XML's idea
328+ /// of whitespace rather than the more extensive set considered
329+ /// whitespace by Unicode.
330+ /// </para>
331+ /// <para>
332+ /// since XMLUnit 2.10.0
333+ /// </para>
334+ /// </remarks>
335+ internal static string XmlNormalize ( string s ) {
336+ return Normalize ( s , c => XML_WHITESPACE_CHARS . Contains ( c ) ) ;
337+ }
338+
339+ private static string Normalize ( string s , Predicate < char > isWhiteSpace ) {
214340 StringBuilder sb = new StringBuilder ( ) ;
215341 bool changed = false ;
216342 bool lastCharWasWS = false ;
217343 foreach ( char c in s ) {
218- if ( char . IsWhiteSpace ( c ) ) {
344+ if ( isWhiteSpace ( c ) ) {
219345 if ( ! lastCharWasWS ) {
220346 sb . Append ( SPACE ) ;
221347 changed |= ( c != SPACE ) ;
@@ -231,13 +357,13 @@ internal static string Normalize(string s) {
231357 return changed ? sb . ToString ( ) : s ;
232358 }
233359
234- private static void StripECW ( XmlNode n ) {
360+ private static void StripECW ( XmlNode n , Func < XmlNode , String > trimmer ) {
235361 LinkedList < XmlNode > toRemove = new LinkedList < XmlNode > ( ) ;
236362 foreach ( XmlNode child in n . ChildNodes ) {
237- StripECW ( child ) ;
363+ StripECW ( child , trimmer ) ;
238364 if ( ! ( n is XmlAttribute )
239365 && IsTextualContentNode ( child )
240- && child . Value . Trim ( ) . Length == 0 ) {
366+ && trimmer ( child ) . Length == 0 ) {
241367 toRemove . AddLast ( child ) ;
242368 }
243369 }
0 commit comments