Skip to content

Commit

Permalink
fix LoadHtml with InnerHtml by default implicit set ReturnType
Browse files Browse the repository at this point in the history
  • Loading branch information
rwecho committed Jul 25, 2023
1 parent 2930b38 commit 0451ed8
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 6 deletions.
38 changes: 34 additions & 4 deletions src/HtmlAgilityPack.Shared/HtmlNode.Encapsulator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
using System.Collections;
using System.Collections.Generic;
using System.Reflection;
using System.Xml;
using System.Xml.XPath;

namespace HtmlAgilityPack
Expand Down Expand Up @@ -177,7 +178,9 @@ public object GetEncapsulatedData(Type targetType, HtmlDocument htmlDocument = n
{
HtmlDocument innerHtmlDocument = new HtmlDocument();

innerHtmlDocument.LoadHtml(Tools.GetHtmlForEncapsulation(htmlNode, xPathAttribute.NodeReturnType));
innerHtmlDocument.LoadHtml(Tools.GetHtmlForEncapsulation(
htmlNode,
xPathAttribute.IsNodeReturnTypeExplicitlySet ? xPathAttribute.NodeReturnType : ReturnType.InnerHtml));

object o = GetEncapsulatedData(propertyInfo.PropertyType, innerHtmlDocument);

Expand Down Expand Up @@ -295,7 +298,9 @@ public object GetEncapsulatedData(Type targetType, HtmlDocument htmlDocument = n
foreach (HtmlNode node in nodeCollection)
{
HtmlDocument innerHtmlDocument = new HtmlDocument();
innerHtmlDocument.LoadHtml(Tools.GetHtmlForEncapsulation(node, xPathAttribute.NodeReturnType));
innerHtmlDocument.LoadHtml(Tools.GetHtmlForEncapsulation(
node,
xPathAttribute.IsNodeReturnTypeExplicitlySet ? xPathAttribute.NodeReturnType : ReturnType.InnerHtml));

object o = GetEncapsulatedData(T_Types[0], innerHtmlDocument);

Expand Down Expand Up @@ -789,6 +794,9 @@ public sealed class HasXPathAttribute : Attribute
[AttributeUsage(AttributeTargets.Property, Inherited = false, AllowMultiple = false)]
public sealed class XPathAttribute : Attribute
{
private ReturnType _nodeReturnType;
internal bool IsNodeReturnTypeExplicitlySet { get; private set; }

/// <summary>
/// XPath Expression that is used to find related html node.
/// </summary>
Expand All @@ -802,7 +810,14 @@ public sealed class XPathAttribute : Attribute
/// <summary>
/// The methode of output
/// </summary>
public ReturnType NodeReturnType { get; set; }
public ReturnType NodeReturnType
{
get => _nodeReturnType; set
{
_nodeReturnType = value;
IsNodeReturnTypeExplicitlySet = true;
}
}

/// <summary>
/// Specify Xpath to find related Html Node.
Expand All @@ -811,7 +826,7 @@ public sealed class XPathAttribute : Attribute
public XPathAttribute(string xpathString)
{
XPath = xpathString;
NodeReturnType = ReturnType.InnerText;
_nodeReturnType = ReturnType.InnerText;
}

/// <summary>
Expand All @@ -834,6 +849,21 @@ public XPathAttribute(string xpathString, string attributeName)
{
XPath = xpathString;
AttributeName = attributeName;
_nodeReturnType = ReturnType.InnerText;
}


/// <summary>
/// Specify Xpath and Attribute to find related Html Node and its attribute value.
/// </summary>
/// <param name="xpathString"></param>
/// <param name="attributeName"></param>
/// <param name="nodeReturnType">Specify you want the output include html text too.</param>
public XPathAttribute(string xpathString, string attributeName, ReturnType nodeReturnType)
{
XPath = xpathString;
AttributeName = attributeName;
NodeReturnType = nodeReturnType;
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;

namespace HtmlAgilityPack.Tests.NetStandard2_0
{
[HasXPath]
[DebuggerDisplay("{Title}")]
public class EncapsulationTestHtml
{
public const string Html = @"
<html>
<head>
<title>Test</title>
</head>
<body>
<h1>Test</h1>
<a class='link' href='link1.html'>Link 1</a>
<a class='link' href='link2.html'>Link 2</a>
<a class='link' href='link3.html'>Link 3</a>
<div class='form'>
<form>
<input name='username' value='test' />
<input name='password' value='test' />
<input name='email' value='' />
<input name='email2' value='' />
</form>
</div>
<article id='9718'>
<h2><a href='/article1'>Article 1</a> </h2>
<div><span title='2021-09-23 23:22:01'>3 seconds ago</span></div>
<p>Content 1</p>
</article>
<article id='2312'>
<h2><a href='/article2'>Article 2</a> </h2>
<p>Content 2</p>
</article>
";
[XPath("//head/title")]
public string Title { get; set; } = null!;

[XPath("//h1")]
public string H1 { get; set; } = null!;

[XPath("//a[@class='link'][1]")]
public string Link1 { get; set; } = null!;

[XPath("//a[@class='link'][2]")]
[SkipNodeNotFound]
public string? Link2 { get; set; }

[XPath("//a[@class='link'][3]", ReturnType.OuterHtml)]
[SkipNodeNotFound]
public Link? Link3 { get; set; }

[XPath("//div/form")]
public FormName? Form { get; set; }

[XPath("//article", ReturnType.OuterHtml)]
public List<Article> Articles { get; set; } = new List<Article>();

[HasXPath]
[DebuggerDisplay("{Text}/{Href}")]
public class Link
{
[XPath("a")]
public string Text { get; set; } = null!;

[XPath("a", "href")]
public string Href { get; set; } = null!;
}

[HasXPath]
[DebuggerDisplay("{UserName}")]
public class FormName
{
[XPath("input[1]", "name")]
public string UserName { get; set; } = null!;

[XPath("input[2]", "name")]
public string Password { get; set; } = null!;
[XPath("input[3]", "name")]
public string Email { get; set; } = null!;
[XPath("input[4]", "name")]
public string Email2 { get; set; } = null!;
}


[HasXPath]
[DebuggerDisplay("{Id}/{Title}")]
public class Article
{
[XPath("article", "id")]
public string Id { get; set; } = null!;
[XPath("article/h2/a", ReturnType.OuterHtml)]
public Link Title { get; set; } = null!;

[XPath("article/div/span", "title")]
[SkipNodeNotFound]
public DateTime Created { get; set; }

[XPath("article/div/span")]
[SkipNodeNotFound]
public string? CreatedText { get; set; } = null!;
[XPath("article/p")]
public string? Content { get; set; }
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,25 @@ public void EncapsulatedOuterHtml_Test()
Assert.True(outerHtml.Items.Count == 3);
Assert.True(outerHtml.Items.All(o => o.Href != null));
}

[Fact]
public void Encapsulation_Test2()
{
var docoument = new HtmlDocument();
docoument.LoadHtml(EncapsulationTestHtml.Html);
var testHtml = docoument.DocumentNode.GetEncapsulatedData<EncapsulationTestHtml>();

Assert.True(testHtml.Title == "Test");
Assert.True(testHtml.H1 == "Test");
Assert.NotEmpty(testHtml.Link1);
Assert.NotEmpty(testHtml.Link2);
Assert.NotNull(testHtml.Link3);

Assert.NotNull(testHtml.Form);

Assert.True(testHtml.Articles.Count == 2);
Assert.True(testHtml.Articles.All(o => o.Id != null));
}
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>

<IsPackable>false</IsPackable>
<Nullable>enable</Nullable>
<IsPackable>false</IsPackable>
</PropertyGroup>

<ItemGroup>
Expand Down

0 comments on commit 0451ed8

Please sign in to comment.