Skip to content
Permalink
Browse files

New: Add HTMLDocument and HTMLElement type

  • Loading branch information...
sarvaje authored and antross committed Mar 7, 2019
1 parent 9fa29b6 commit 0439f9bc38f3f3d72779bcbe0a164267c8ea3db0
@@ -16,6 +16,7 @@
"caniuse-api": "^3.0.0",
"chalk": "^2.4.2",
"content-type": "^1.0.4",
"css-select": "^2.0.2",
"debug": "^4.1.1",
"eventemitter2": "^5.0.1",
"file-type": "^10.9.0",
@@ -29,6 +30,8 @@
"npm-registry-fetch": "^3.9.0",
"optionator": "^0.8.2",
"ora": "^3.2.0",
"parse5": "^5.1.0",
"parse5-htmlparser2-tree-adapter": "^5.1.0",
"request": "^2.88.0",
"semver": "^5.6.0",
"strip-bom": "^3.0.0",
@@ -47,25 +50,27 @@
"@types/mime-db": "^1.27.0",
"@types/mkdirp": "^0.5.2",
"@types/node": "11.10.5",
"@types/parse5": "^5.0.0",
"@types/parse5-htmlparser2-tree-adapter": "^5.0.1",
"@types/request": "^2.48.1",
"@types/semver": "^5.4.0",
"@types/sinon": "^7.0.9",
"@types/strip-bom": "^3.0.0",
"@types/strip-json-comments": "^0.0.30",
"@types/update-notifier": "^2.5.0",
"@typescript-eslint/eslint-plugin": "^1.4.2",
"@typescript-eslint/parser": "1.4.2",
"ava": "^1.3.1",
"cpx": "^1.5.0",
"eslint": "^5.15.1",
"eslint-plugin-import": "^2.16.0",
"eslint-plugin-markdown": "^1.0.0",
"@typescript-eslint/eslint-plugin": "^1.4.2",
"npm-run-all": "^4.1.5",
"nyc": "^13.3.0",
"proxyquire": "2.0.0",
"rimraf": "^2.6.3",
"sinon": "^7.2.7",
"typescript": "^3.3.3333",
"@typescript-eslint/parser": "1.4.2"
"typescript": "^3.3.3333"
},
"engines": {
"node": ">=8.0.0"
@@ -0,0 +1,161 @@
import * as parse5 from 'parse5';
import * as htmlparser2Adapter from 'parse5-htmlparser2-tree-adapter';
import * as cssSelect from 'css-select';

import { ProblemLocation } from '../types';

type Attrib = {
[key: string]: string;
};

export type HTMLAttribute = {
/** Attribute name of the element */
name: string;

/** Attribute value of the element */
value: string;
};

export interface INamedNodeMap {
[index: number]: HTMLAttribute;
item?(index: number): HTMLAttribute | null;
readonly length: number;
}

type ParsedHTMLElement = {
attribs: Attrib;
children: ParsedHTMLElement[];
next: ParsedHTMLElement | null;
nodeType: number;
parent: ParsedHTMLElement | null;
prev: ParsedHTMLElement | null;
sourceCodeLocation: parse5.ElementLocation;
tagName: string;
}

export class HTMLElement {
public ownerDocument?: HTMLDocument;

private _element: ParsedHTMLElement;

public constructor(element: ParsedHTMLElement | HTMLElement, ownerDocument?: HTMLDocument) {
this._element = element instanceof HTMLElement ? element._element : element;
this.ownerDocument = ownerDocument;
}

public get attributes(): INamedNodeMap {
const x = this._element.attribs;

return Object.entries(x).map(([name, value]) => {
return {
name,
value
};
});
}

public get children(): HTMLElement[] {
const result: HTMLElement[] = [];

for (const child of this._element.children) {
if (child.nodeType === 1) {
result.push(new HTMLElement(child as ParsedHTMLElement, this.ownerDocument));
}
}

return result;
}

public get nodeName(): string {
return this._element.tagName;
}

public getAttribute(attribute: string): string | null {
const attrib = this._element.attribs[attribute];
const value = typeof attrib !== 'undefined' ? attrib : null;

return value;
}

/**
* zero-based location of the element.
*/
public getLocation(): ProblemLocation {
const location = this._element.sourceCodeLocation;

return {
// Column is zero-based, but pointing to the tag name, not the character <
column: location.startCol,
line: location.startLine - 1
};
}

public isSame(element: HTMLElement): boolean {
return this._element === element._element;
}

public outerHTML(): string {
/*
* Until parse5 support outerHTML
* (https://github.com/inikulin/parse5/issues/230)
* we need to use this workaround.
* https://github.com/inikulin/parse5/issues/118
*
* The problem with this workaround will modify the
* parentElement and parentNode of the element, so we
* need to restore it before return the outerHTML.
*/
const fragment = htmlparser2Adapter.createDocumentFragment();
const { parent, next, prev } = this._element;

htmlparser2Adapter.appendChild(fragment, this._element);

const result = parse5.serialize(fragment, { treeAdapter: htmlparser2Adapter });

this._element.parent = parent;
this._element.next = next;
this._element.prev = prev;

if (next) {
next.prev = this._element;
}

if (prev) {
prev.next = this._element;
}

return result;
}
}

export class HTMLDocument {
private _document: any;
private _pageHTML = '';

public constructor(document: parse5.Document) {
this._document = document;
this._pageHTML = parse5.serialize(document, { treeAdapter: htmlparser2Adapter });
}

public get documentElement(): HTMLElement {
const htmlNode = this._document.children.find((node: any) => {
return node.type === 'tag' && node.name === 'html';
});

return new HTMLElement(htmlNode, this);
}

public pageHTML(): string {
return this._pageHTML;
}

public querySelectorAll(selector: string): HTMLElement[] {
const matches: any[] = cssSelect(selector, this._document.children);

const result = matches.map((element) => {
return new HTMLElement(element, this);
});

return result;
}
}
@@ -0,0 +1,6 @@
<!DOCTYPE html><html><head></head><body>
<h1 class="title">Title</h1>
<p class="content">This is the content</p>


</body></html>
@@ -0,0 +1,7 @@
<!DOCTYPE html>
<html>
<body>
<h1 class="title">Title</h1>
<p class="content">This is the content</p>
</body>
</html>
@@ -0,0 +1,86 @@
import * as fs from 'fs';
import * as path from 'path';

import anyTest, { TestInterface } from 'ava';

import createHtmlDocument from '../../../src/lib/utils/dom/create-html-document';
import { HTMLDocument } from '../../../src/lib/types';

type HTMLContext = {
document: HTMLDocument;
};

const test = anyTest as TestInterface<HTMLContext>;

const html = fs.readFileSync(path.join(__dirname, '..', 'fixtures', 'test-html.html'), 'utf-8'); // eslint-disable-line no-sync
const serializedHTML = fs.readFileSync(path.join(__dirname, '..', 'fixtures', 'serialized-test-html.html'), 'utf-8'); // eslint-disable-line no-sync

test.beforeEach((t) => {
t.context.document = createHtmlDocument(html);
});

test('HTMLDocument.dom() should return the html node', (t) => {
const dom = t.context.document.documentElement;

t.is(dom.nodeName, 'html');
});

test('HTMLDocument.pageHTML() should return the html code', (t) => {
const code = t.context.document.pageHTML();

t.is(code.replace(/\r/g, ''), serializedHTML.replace(/\r/g, ''));
});

test('HTMLDocument.querySelectorAll should return an empty array if no item match the query', (t) => {
const items = t.context.document.querySelectorAll('img');

t.true(Array.isArray(items));
t.is(items.length, 0);
});

test('HTMLDocument.querySelectorAll should return the right element', (t) => {
const items = t.context.document.querySelectorAll('.title');

t.is(items.length, 1);
t.is(items[0].getAttribute('class'), 'title');
});

test('HTMLElement.attributes should return an array with all the attributes', (t) => {
const item = t.context.document.querySelectorAll('.title')[0];

t.is(item.attributes.length, 1);
t.is(item.attributes[0].name, 'class');
t.is(item.attributes[0].value, 'title');
});

test('HTMLElement.nodeName should return the node name', (t) => {
const item = t.context.document.querySelectorAll('.title')[0];

t.is(item.nodeName, 'h1');
});

test('HTMLElement.getAttribute() should return the attribute value', (t) => {
const item = t.context.document.querySelectorAll('.title')[0];

t.is(item.getAttribute('class'), 'title');
});

test('HTMLElement.getLocation() should return the element location', (t) => {
const item = t.context.document.querySelectorAll('.title')[0];
const location = item.getLocation();

t.is(location.line, 3);
t.is(location.column, 9);
});

test('HTMLElement.isSame() should return if an item is the same or not', (t) => {
const item = t.context.document.querySelectorAll('.title')[0];

t.true(item.isSame(item));
});

test('HTMLElement.outerHTML() should return the element HTML', (t) => {
const item = t.context.document.querySelectorAll('.title')[0];

t.is(item.outerHTML(), '<h1 class="title">Title</h1>');
});
Oops, something went wrong.

0 comments on commit 0439f9b

Please sign in to comment.
You can’t perform that action at this time.