Skip to content

Commit

Permalink
Update source
Browse files Browse the repository at this point in the history
Update source
  • Loading branch information
JonathanMagnan committed Jul 19, 2019
1 parent 83f8b53 commit 0c2e574
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 26 deletions.
42 changes: 26 additions & 16 deletions src/HtmlAgilityPack.Shared/HtmlDocument.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public static bool DisableBehaviorTagP
private Encoding _declaredencoding;
private HtmlNode _documentnode;
private bool _fullcomment;
private int _index;
private int _index;
internal Dictionary<string, HtmlNode> Lastnodes = new Dictionary<string, HtmlNode>();
private HtmlNode _lastparentnode;
private int _line;
Expand Down Expand Up @@ -1208,7 +1208,7 @@ private void IncrementPosition()

private bool IsValidTag()
{
bool isValidTag = _c == '<' && _index < Text.Length && (Char.IsLetter(Text[_index]) || Text[_index] == '/' || Text[_index] == '!' || Text[_index] == '%');
bool isValidTag = _c == '<' && _index < Text.Length && (Char.IsLetter(Text[_index]) || Text[_index] == '/' || Text[_index] == '?' || Text[_index] == '!' || Text[_index] == '%');
return isValidTag;
}

Expand Down Expand Up @@ -1260,9 +1260,9 @@ private bool NewCheck()
_state = ParseState.WhichTag;
if ((_index - 1) <= (Text.Length - 2))
{
if (Text[_index] == '!')
if (Text[_index] == '!' || Text[_index] == '?')
{
PushNodeStart(HtmlNodeType.Comment, _index - 1, _lineposition -1);
PushNodeStart(HtmlNodeType.Comment, _index - 1, _lineposition -1);
PushNodeNameStart(true, _index);
PushNodeNameEnd(_index + 1);
_state = ParseState.Comment;
Expand All @@ -1276,7 +1276,7 @@ private bool NewCheck()
else
{
_fullcomment = false;
}
}
}

return true;
Expand Down Expand Up @@ -1635,7 +1635,7 @@ private void Parse()
{
if (_fullcomment)
{
if (((Text[_index - 2] != '-') || (Text[_index - 3] != '-'))
if (((Text[_index - 2] != '-') || (Text[_index - 3] != '-'))
&&
((Text[_index - 2] != '!') || (Text[_index - 3] != '-') ||
(Text[_index - 4] != '-')))
Expand Down Expand Up @@ -1706,18 +1706,20 @@ private void Parse()
int c = Text[_index - 1 + 2 + _currentnode.Name.Length];
if ((c == '>') || (IsWhiteSpace(c)))
{
// add the script as a text node
HtmlNode script = CreateNode(HtmlNodeType.Text,
// add the script as a text node
HtmlNode script = CreateNode(HtmlNodeType.Text,
_currentnode._outerstartindex +
_currentnode._outerlength);
script._outerlength = _index - 1 - script._outerstartindex;
script._streamposition = script._outerstartindex;
script._line = _currentnode.Line;
script._lineposition = _currentnode.LinePosition + _currentnode._namelength + 2;
_currentnode.AppendChild(script);

_currentnode.AppendChild(script);
_currentnode._isPcData = true;


PushNodeStart(HtmlNodeType.Element, _index - 1, _lineposition -1);
PushNodeStart(HtmlNodeType.Element, _index - 1, _lineposition -1);
PushNodeNameStart(false, _index - 1 + 2);
_state = ParseState.Tag;
IncrementPosition();
Expand All @@ -1744,13 +1746,21 @@ private void Parse()
Lastnodes.Clear();
}

private void PushAttributeNameEnd(int index)
{
_currentattribute._namelength = index - _currentattribute._namestartindex;
_currentnode.Attributes.Append(_currentattribute);
}
// In this moment, we don't have value.
// Potential: "\"", "'", "[", "]", "<", ">", "-", "|", "/", "\\"
private static List<string> BlockAttributes = new List<string>() { "\"", "'" };

private void PushAttributeNameEnd(int index)
{
_currentattribute._namelength = index - _currentattribute._namestartindex;

if (_currentattribute.Name != null && !BlockAttributes.Contains(_currentattribute.Name))
{
_currentnode.Attributes.Append(_currentattribute);
}
}

private void PushAttributeNameStart(int index, int lineposition)
private void PushAttributeNameStart(int index, int lineposition)
{
_currentattribute = CreateAttribute();
_currentattribute._namestartindex = index;
Expand Down
23 changes: 13 additions & 10 deletions src/HtmlAgilityPack.Shared/HtmlNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public partial class HtmlNode
internal bool _starttag;
internal int _streamposition;
internal bool _isImplicitEnd;
internal bool _isPcData;

#endregion

Expand Down Expand Up @@ -95,8 +96,9 @@ static HtmlNode()
ElementsFlags = new Dictionary<string, HtmlElementFlag>(StringComparer.OrdinalIgnoreCase);
ElementsFlags.Add("script", HtmlElementFlag.CData);
ElementsFlags.Add("style", HtmlElementFlag.CData);
ElementsFlags.Add("noxhtml", HtmlElementFlag.CData);
ElementsFlags.Add("noxhtml", HtmlElementFlag.CData); // can't found.
ElementsFlags.Add("textarea", HtmlElementFlag.CData);
ElementsFlags.Add("title", HtmlElementFlag.CData);

// tags that can not contain other tags
ElementsFlags.Add("base", HtmlElementFlag.Empty);
Expand Down Expand Up @@ -369,7 +371,7 @@ public virtual string InnerHtml
}

/// <summary>
/// Gets or Sets the text between the start and end tags of the object.
/// Gets the text between the start and end tags of the object.
/// </summary>
public virtual string InnerText
{
Expand All @@ -386,7 +388,7 @@ public virtual string InnerText

return GetCurrentNodeText();
}

if (_nodetype == HtmlNodeType.Text)
return ((HtmlTextNode) this).Text;

Expand All @@ -396,7 +398,7 @@ public virtual string InnerText

// note: right now, this method is *slow*, because we recompute everything.
// it could be optimized like innerhtml
if (!HasChildNodes)
if (!HasChildNodes || _isPcData)
return string.Empty;

string s = null;
Expand Down Expand Up @@ -480,13 +482,13 @@ internal void AppendDirectInnerText(StringBuilder sb)
}

internal void AppendInnerText(StringBuilder sb)
{
{
if (_nodetype == HtmlNodeType.Text)
{
sb.Append(GetCurrentNodeText());
}

if (!HasChildNodes) return;
if (!HasChildNodes || _isPcData) return;

foreach (HtmlNode node in ChildNodes)
{
Expand Down Expand Up @@ -2092,7 +2094,7 @@ internal void WriteAttribute(TextWriter outText, HtmlAttribute att)

string name;
string quote = att.QuoteType == AttributeValueQuote.DoubleQuote ? "\"" : "'";
if (_ownerdocument.OptionOutputAsXml)
if (_ownerdocument.OptionOutputAsXml)
{
name = _ownerdocument.OptionOutputUpperCase ? att.XmlName.ToUpperInvariant(): att.XmlName;
if (_ownerdocument.OptionOutputOriginalCase)
Expand All @@ -2115,13 +2117,14 @@ internal void WriteAttribute(TextWriter outText, HtmlAttribute att)
}
}

if (_ownerdocument.OptionOutputOptimizeAttributeValues)
var value = att.QuoteType == AttributeValueQuote.DoubleQuote ? att.Value.Replace("\"", "&quot;") : att.Value.Replace("'", "&#39;");
if (_ownerdocument.OptionOutputOptimizeAttributeValues)
if (att.Value.IndexOfAny(new char[] {(char) 10, (char) 13, (char) 9, ' '}) < 0)
outText.Write(" " + name + "=" + att.Value);
else
outText.Write(" " + name + "=" + quote + att.Value + quote);
outText.Write(" " + name + "=" + quote + value + quote);
else
outText.Write(" " + name + "=" + quote + att.Value + quote);
outText.Write(" " + name + "=" + quote + value + quote);
}
}

Expand Down

0 comments on commit 0c2e574

Please sign in to comment.