Skip to content

Commit 6e334a4

Browse files
committed
add a new XmlWhitespaceStrippedSource that honors XML's idea of whitespace
see https://github.com/xmlunit/xmlunit/issues/260
1 parent 03b1999 commit 6e334a4

File tree

8 files changed

+166
-14
lines changed

8 files changed

+166
-14
lines changed

RELEASE_NOTES.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@
1212
[#38](https://github.com/xmlunit/xmlunit.net/issues/38). And neither
1313
of the methods could deal with `XmlSignificantWhitespace` at all.
1414

15+
* add `XmlWhitespaceStrippedSource` that only trims characters that
16+
are considered whitespace by the [XML
17+
Specification](https://www.w3.org/TR/xml11/#NT-S) from textual
18+
content.
19+
Issue [xmlunit/#260](https://github.com/xmlunit/xmlunit/issues/260).
20+
1521
## XMLUnit.NET 2.9.2 - /Released 2023-03-16/
1622

1723
* added `NodeFilters#SatisfiesAll` and `SatifiesAny` methods to make
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
This file is licensed to You under the Apache License, Version 2.0
3+
(the "License"); you may not use this file except in compliance with
4+
the License. You may obtain a copy of the License at
5+
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
8+
Unless required by applicable law or agreed to in writing, software
9+
distributed under the License is distributed on an "AS IS" BASIS,
10+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
See the License for the specific language governing permissions and
12+
limitations under the License.
13+
*/
14+
15+
using Org.XmlUnit.Util;
16+
17+
namespace Org.XmlUnit.Input {
18+
19+
/// <summary>
20+
/// A source that is obtained from a different source by removing
21+
/// all empty text nodes and removing all characters XML considers
22+
/// whitespace at the start and end of the text content of the
23+
/// non-empty ones. <see href="https://www.w3.org/TR/xml11/#NT-S"/>
24+
/// </summary>
25+
/// <remarks>
26+
/// <para>
27+
/// If you only want to remove text nodes consisting solely of
28+
/// whitespace (AKA element content whitespace) but leave all
29+
/// other text nodes alone you should use
30+
/// ElementContentWhitespaceStrippedSource instead.
31+
/// </para>
32+
/// </remarks>
33+
public class XmlWhitespaceStrippedSource : DOMSource {
34+
/// <summary>
35+
/// Creates a new Source with the same content as another source trimming whitespace from Text nodes.
36+
/// </summary>
37+
/// <param name="originalSource">source with the original content</param>
38+
public XmlWhitespaceStrippedSource(ISource originalSource) :
39+
base(Nodes.StripXmlWhitespace(originalSource.ToDocument())) {
40+
SystemId = originalSource.SystemId;
41+
}
42+
}
43+
}

src/main/net-core/NetFramework/XMLUnit.Core.NetFramework.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
<Compile Include="..\Input\StringSource.cs" />
111111
<Compile Include="..\Input\WhitespaceNormalizedSource.cs" />
112112
<Compile Include="..\Input\WhitespaceStrippedSource.cs" />
113+
<Compile Include="..\Input\XmlWhitespaceStrippedSource.cs" />
113114
<Compile Include="..\ISource.cs" />
114115
<Compile Include="..\Transform\Transformation.cs" />
115116
<Compile Include="..\Util\Convert.cs" />

src/main/net-core/Util/Nodes.cs

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,22 @@ public static IDictionary<XmlQualifiedName, string>
8484
public static XmlNode StripWhitespace(XmlNode original) {
8585
XmlNode cloned = original.CloneNode(true);
8686
cloned.Normalize();
87-
HandleWsRec(cloned, false);
87+
HandleWsRec(cloned, TrimValue);
88+
return cloned;
89+
}
90+
91+
/// <summary>
92+
/// Creates a new Node (of the same type as the original node)
93+
/// that is similar to the orginal but doesn't contain any
94+
/// empty text or CDATA nodes and where all textual content
95+
/// including attribute values or comments are trimmed of
96+
/// characters XML considers whitespace according to
97+
/// <see href="https://www.w3.org/TR/xml11/#NT-S"/>.
98+
/// </summary>
99+
public static XmlNode StripXmlWhitespace(XmlNode original) {
100+
XmlNode cloned = original.CloneNode(true);
101+
cloned.Normalize();
102+
HandleWsRec(cloned, XmlTrimValue);
88103
return cloned;
89104
}
90105

@@ -104,7 +119,7 @@ public static XmlNode StripWhitespace(XmlNode original) {
104119
public static XmlNode NormalizeWhitespace(XmlNode original) {
105120
XmlNode cloned = original.CloneNode(true);
106121
cloned.Normalize();
107-
HandleWsRec(cloned, true);
122+
HandleWsRec(cloned, TrimAndNormalizeValue);
108123
return cloned;
109124
}
110125

@@ -129,23 +144,44 @@ public static XmlNode StripElementContentWhitespace(XmlNode original) {
129144
return cloned;
130145
}
131146

147+
/// <summary>
148+
/// Returns the nodes' value trimmed of all whitespace.
149+
/// <summary>
150+
private static String TrimValue(XmlNode n) {
151+
return n.Value.Trim();
152+
}
153+
154+
/// <summary>
155+
/// Returns the nodes' value trimmed of all whitespace and Normalized
156+
/// <summary>
157+
private static String TrimAndNormalizeValue(XmlNode n) {
158+
return Normalize(TrimValue(n));
159+
}
160+
161+
private static readonly char[] XML_WHITESPACE_CHARS = {
162+
' ', '\r', '\n', '\t'
163+
};
164+
165+
/// <summary>
166+
/// Returns the nodes' value trimmed of all characters XML considers whitespace.
167+
/// <summary>
168+
private static String XmlTrimValue(XmlNode n) {
169+
return n.Value.Trim(XML_WHITESPACE_CHARS);
170+
}
171+
132172
/// <summary>
133173
/// Trims textual content of this node, removes empty text and
134174
/// CDATA children, recurses into its child nodes.
135175
/// </summary>
136176
/// <parameter name="normalize">whether to normalize
137177
/// whitespace as well</parameter>
138-
private static void HandleWsRec(XmlNode n, bool normalize) {
178+
private static void HandleWsRec(XmlNode n, Func<XmlNode, String> handleWs) {
139179
if (n is XmlCharacterData || n is XmlProcessingInstruction) {
140-
string s = n.Value.Trim();
141-
if (normalize) {
142-
s = Normalize(s);
143-
}
144-
n.Value = s;
180+
n.Value = handleWs(n);
145181
}
146182
LinkedList<XmlNode> toRemove = new LinkedList<XmlNode>();
147183
foreach (XmlNode child in n.ChildNodes) {
148-
HandleWsRec(child, normalize);
184+
HandleWsRec(child, handleWs);
149185
if (!(n is XmlAttribute)
150186
&& IsTextualContentNode(child)
151187
&& child.Value.Length == 0) {
@@ -158,7 +194,7 @@ private static void HandleWsRec(XmlNode n, bool normalize) {
158194
XmlNamedNodeMap attrs = n.Attributes;
159195
if (attrs != null) {
160196
foreach (XmlAttribute a in attrs) {
161-
HandleWsRec(a, normalize);
197+
HandleWsRec(a, handleWs);
162198
}
163199
}
164200
}

src/tests/net-core/Input/WhitespaceStrippedSourceTest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ public void WhitespaceIsStrippedProperlyEvenWithPreserveWhitespaceDoc() {
3535
}
3636

3737
private void WhitespaceIsStrippedProperly(XmlDocument testDoc) {
38-
string testXml = "<a>\n <b>\n Test Value\n </b>\n</a>";
38+
string testXml = "<a>\n <b>\n Test Value\u00a0\n </b>\n</a>";
3939
testDoc.LoadXml(testXml);
4040
WhitespaceStrippedSource s = new WhitespaceStrippedSource(new DOMSource(testDoc));
4141
XmlNode root = s.Node;
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
This file is licensed to You under the Apache License, Version 2.0
3+
(the "License"); you may not use this file except in compliance with
4+
the License. You may obtain a copy of the License at
5+
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
8+
Unless required by applicable law or agreed to in writing, software
9+
distributed under the License is distributed on an "AS IS" BASIS,
10+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
See the License for the specific language governing permissions and
12+
limitations under the License.
13+
*/
14+
15+
using System.Xml;
16+
using Org.XmlUnit.Util;
17+
using NUnit.Framework;
18+
19+
namespace Org.XmlUnit.Input {
20+
21+
[TestFixture]
22+
public class XmlWhitespaceStrippedSourceTest {
23+
24+
[Test]
25+
public void XmlWhitespaceIsStrippedProperly() {
26+
XmlWhitespaceIsStrippedProperly(new XmlDocument());
27+
}
28+
29+
[Test]
30+
public void XmlWhitespaceIsStrippedProperlyEvenWithPreserveWhitespaceDoc() {
31+
XmlDocument testDoc = new XmlDocument();
32+
testDoc.PreserveWhitespace = true;
33+
XmlWhitespaceIsStrippedProperly(testDoc);
34+
}
35+
36+
private void XmlWhitespaceIsStrippedProperly(XmlDocument testDoc) {
37+
string testXml = "<a>\n <b>\n Test Value\u00a0\n </b>\n</a>";
38+
testDoc.LoadXml(testXml);
39+
XmlWhitespaceStrippedSource s = new XmlWhitespaceStrippedSource(new DOMSource(testDoc));
40+
XmlNode root = s.Node;
41+
Assert.AreEqual(1, root.ChildNodes.Count);
42+
XmlNode a = root.FirstChild;
43+
Assert.AreEqual(1, a.ChildNodes.Count);
44+
XmlNode b = a.FirstChild;
45+
Assert.AreEqual(1, b.ChildNodes.Count);
46+
Assert.AreEqual("Test Value\u00a0", b.FirstChild.Value);
47+
}
48+
}
49+
}

src/tests/net-core/NetFramework/XMLUnit.Core.Tests.NetFramework.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@
8787
<Compile Include="..\Input\CommentLessSourceTest.cs" />
8888
<Compile Include="..\Input\NormalizedSourceTest.cs" />
8989
<Compile Include="..\Input\WhitespaceStrippedSourceTest.cs" />
90+
<Compile Include="..\Input\XmlWhitespaceStrippedSourceTest.cs" />
9091
<Compile Include="..\TestResources.cs" />
9192
<Compile Include="..\Transform\TransformationTest.cs" />
9293
<Compile Include="..\Util\ConvertTest.cs" />

src/tests/net-core/Util/NodesTest.cs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ [Test] public void AttributeMapWithFilter() {
135135
private XmlDocument HandleWsSetup() {
136136
return Convert.ToDocument(InputBuilder.FromString(
137137
"<root>\n"
138-
+ "<!-- trim\tme -->\n"
138+
+ "<!--\u00a0 trim\tme\u00a0 -->\n"
139139
+ "<child attr=' trim me ' attr2='not me'>\n"
140140
+ " trim me \n"
141141
+ "</child><![CDATA[ trim me ]]>\n"
@@ -156,11 +156,22 @@ private KeyValuePair<XmlDocument, XmlNode> NormalizeWsSetup() {
156156
XmlNode>(toTest, Nodes.NormalizeWhitespace(toTest));
157157
}
158158

159+
private KeyValuePair<XmlDocument, XmlNode> StripXmlWsSetup() {
160+
XmlDocument toTest = HandleWsSetup();
161+
return new KeyValuePair<XmlDocument,
162+
XmlNode>(toTest, Nodes.StripXmlWhitespace(toTest));
163+
}
164+
159165
[Test]
160166
public void StripWhitespaceWorks() {
161167
HandleWsWorks(StripWsSetup(), "trim\tme");
162168
}
163169

170+
[Test]
171+
public void StripXmlWhitespaceWorks() {
172+
HandleWsWorks(StripXmlWsSetup(), "\u00a0 trim\tme\u00a0");
173+
}
174+
164175
[Test]
165176
public void NormalizeWhitespaceWorks() {
166177
HandleWsWorks(NormalizeWsSetup(), "trim me");
@@ -209,6 +220,11 @@ public void StripWhitespaceDoesntAlterOriginal() {
209220
HandleWsDoesntAlterOriginal(StripWsSetup());
210221
}
211222

223+
[Test]
224+
public void StripXmlWhitespaceDoesntAlterOriginal() {
225+
HandleWsDoesntAlterOriginal(StripXmlWsSetup());
226+
}
227+
212228
[Test]
213229
public void NormalizeWhitespaceDoesntAlterOriginal() {
214230
HandleWsDoesntAlterOriginal(NormalizeWsSetup());
@@ -224,7 +240,7 @@ private void HandleWsDoesntAlterOriginal(KeyValuePair<XmlDocument,
224240
Assert.AreEqual(5, rootsChildren.Count);
225241
Assert.IsTrue(rootsChildren[0] is XmlComment,
226242
"should be comment, is " + rootsChildren[0].GetType());
227-
Assert.AreEqual(" trim\tme ",
243+
Assert.AreEqual("\u00a0 trim\tme\u00a0 ",
228244
((XmlComment) rootsChildren[0]).Data);
229245
Assert.IsTrue(rootsChildren[1] is XmlElement,
230246
"should be element, is " + rootsChildren[1].GetType());
@@ -277,7 +293,7 @@ public void StripECWWorks() {
277293
Assert.AreEqual(4, rootsChildren.Count);
278294
Assert.IsTrue(rootsChildren[0] is XmlComment,
279295
"should be comment, is " + rootsChildren[0].GetType());
280-
Assert.AreEqual(" trim\tme ",
296+
Assert.AreEqual("\u00a0 trim\tme\u00a0 ",
281297
((XmlComment) rootsChildren[0]).Data);
282298
Assert.IsTrue(rootsChildren[1] is XmlElement,
283299
"should be element, is " + rootsChildren[1].GetType());

0 commit comments

Comments
 (0)