Permalink
Browse files

Fix: Use JSDOM locations for elements if available

Fall back to the existing serialization-based behavior otherwise.
  • Loading branch information...
antross authored and alrra committed Aug 31, 2018
1 parent 78e6b68 commit 537bbbbd98c2269d95ecda08e54aa4a086468183
@@ -1,3 +1,5 @@
import { ProblemLocation } from './problems';
export type AsyncHTMLAttribute = {
/** Attribute name of the element */
name: string;
@@ -15,6 +17,8 @@ export interface IAsyncHTMLElement {
readonly attributes;
/** Returns the value for a given attribute */
getAttribute(attribute: string): string;
/** Returns the location of this element in source (or null) */
getLocation(): ProblemLocation;
/** Checks if two AsyncHTMLElements are the same */
isSame(element: IAsyncHTMLElement): boolean;
/** Returns the outerHTML of the element */
@@ -1,12 +1,15 @@
import { IAsyncHTMLDocument, IAsyncHTMLElement, IAsyncWindow } from './async-html';
import { DOMWindow } from 'jsdom';
import { DOMWindow, JSDOM } from 'jsdom';
import { ProblemLocation } from './problems';
/** An implementation of AsyncHTMLDocument on top of JSDDOM */
export class JSDOMAsyncHTMLDocument implements IAsyncHTMLDocument {
private _document: HTMLDocument
private _document: HTMLDocument;
private _dom: JSDOM;
public constructor(document: HTMLDocument) {
public constructor(document: HTMLDocument, dom?: JSDOM) {
this._document = document;
this._dom = dom;
}
/*
@@ -20,7 +23,7 @@ export class JSDOMAsyncHTMLDocument implements IAsyncHTMLDocument {
try {
const elements = Array.prototype.slice.call(this._document.querySelectorAll(selector))
.map((element) => {
return new JSDOMAsyncHTMLElement(element); // eslint-disable-line no-use-before-define, typescript/no-use-before-define
return new JSDOMAsyncHTMLElement(element, this._dom); // eslint-disable-line no-use-before-define, typescript/no-use-before-define
});
return Promise.resolve(elements);
@@ -37,12 +40,14 @@ export class JSDOMAsyncHTMLDocument implements IAsyncHTMLDocument {
/** An implementation of AsyncHTMLElement on top of JSDOM */
export class JSDOMAsyncHTMLElement implements IAsyncHTMLElement {
private _dom: JSDOM;
protected _htmlelement: HTMLElement;
private _ownerDocument: IAsyncHTMLDocument;
public constructor(htmlelement: HTMLElement) {
public constructor(htmlelement: HTMLElement, dom?: JSDOM) {
this._dom = dom;
this._htmlelement = htmlelement;
this._ownerDocument = new JSDOMAsyncHTMLDocument(htmlelement.ownerDocument);
this._ownerDocument = new JSDOMAsyncHTMLDocument(htmlelement.ownerDocument, this._dom);
}
/*
@@ -55,6 +60,21 @@ export class JSDOMAsyncHTMLElement implements IAsyncHTMLElement {
return this._htmlelement.getAttribute(name);
}
/* istanbul ignore next */
public getLocation(): ProblemLocation {
try {
const location = this._dom && this._dom.nodeLocation(this._htmlelement);
return location && {
column: location.startTag.col,
line: location.startTag.line - 1
} || null;
} catch (e) {
// JSDOM throws an exception if `includeNodeLocations` wasn't set.
return null;
}
}
/* istanbul ignore next */
public isSame(element: JSDOMAsyncHTMLElement): boolean {
return this._htmlelement === element._htmlelement;
@@ -89,10 +109,12 @@ export class JSDOMAsyncHTMLElement implements IAsyncHTMLElement {
export class JSDOMAsyncWindow implements IAsyncWindow {
private _window: DOMWindow;
private _document: JSDOMAsyncHTMLDocument;
private _dom: JSDOM;
public constructor(window: DOMWindow) {
public constructor(window: DOMWindow, dom?: JSDOM) {
this._dom = dom;
this._window = window;
this._document = new JSDOMAsyncHTMLDocument(window.document);
this._document = new JSDOMAsyncHTMLDocument(window.document, this._dom);
}
public get document(): IAsyncHTMLDocument {
@@ -66,6 +66,15 @@ const getIndicesOf = (searchStr: string, str: string): Array<number> => {
/** Finds the Location of an HTMLElement in the document */
export const findElementLocation = async (element: IAsyncHTMLElement): Promise<ProblemLocation> => {
const location = element.getLocation();
// Use the element information from parsing if we have it.
/* istanbul ignore next */
if (location) {
return location;
}
// Otherwise try to infer where the element was located.
const html: string = await element.ownerDocument.pageHTML();
const elementHTML: string = await element.outerHTML();
const indexOccurences: Array<number> = getIndicesOf(elementHTML, html);
@@ -39,36 +39,36 @@ export default class HTMLParser extends Parser {
});
const window = new JSDOMAsyncWindow(dom.window);
const window = new JSDOMAsyncWindow(dom.window, dom);
const documentElement = dom.window.document.documentElement;
await this.engine.emitAsync(`parse::${this.name}::end`, { html, resource, window } as HTMLParse);
const event = { resource } as Event;
await this.engine.emitAsync('traverse::start', event);
await this.traverseAndNotify(documentElement);
await this.traverseAndNotify(documentElement, dom);
await this.engine.emitAsync('traverse::end', event);
}
/** Traverses the DOM while sending `element::typeofelement` events. */
private async traverseAndNotify(element: HTMLElement): Promise<void> {
private async traverseAndNotify(element: HTMLElement, dom: JSDOM): Promise<void> {
await this.engine.emitAsync(`element::${element.tagName.toLowerCase()}`, {
element: new JSDOMAsyncHTMLElement(element),
element: new JSDOMAsyncHTMLElement(element, dom),
resource: this._url
} as ElementFound);
const traverseEvent = {
element: new JSDOMAsyncHTMLElement(element),
element: new JSDOMAsyncHTMLElement(element, dom),
resource: this._url
} as TraverseDown | TraverseUp;
await this.engine.emitAsync(`traverse::down`, traverseEvent);
// Recursively traverse child elements.
for (let i = 0; i < element.children.length; i++) {
await this.traverseAndNotify(element.children[i] as HTMLElement);
await this.traverseAndNotify(element.children[i] as HTMLElement, dom);
}
await this.engine.emitAsync(`traverse::up`, traverseEvent);
@@ -3,6 +3,7 @@ import test from 'ava';
import { EventEmitter2 } from 'eventemitter2';
import * as HTMLParser from '../src/parser';
import { IAsyncHTMLDocument } from 'hint/dist/src/lib/types';
test.beforeEach((t) => {
t.context.engine = new EventEmitter2({
@@ -29,9 +30,10 @@ test.serial('If `fetch::end::html` is received, then the code should be parsed a
});
const args = t.context.engine.emitAsync.args;
const document = args[1][1].window.document;
const document = args[1][1].window.document as IAsyncHTMLDocument;
const div = (await document.querySelectorAll('div'))[0];
const div2 = (await document.querySelectorAll('body > div'))[0];
const location = div.getLocation();
let id = null;
@@ -49,6 +51,8 @@ test.serial('If `fetch::end::html` is received, then the code should be parsed a
t.is(await div.outerHTML(), '<div id="test">Test</div>');
t.is(div.nodeName.toLowerCase(), 'div');
t.is(div.getAttribute('id'), 'test');
t.is(location && location.line, 0);
t.is(location && location.column, 16);
t.is(id.value, 'test');
t.true(div.isSame(div2));
@@ -1,5 +1,6 @@
import { IAsyncHTMLDocument, IAsyncHTMLElement } from 'hint/dist/src/lib/types/async-html'; //eslint-disable-line
import { debug as d } from 'hint/dist/src/lib/utils/debug';
import { ProblemLocation } from 'hint/dist/src/lib/types';
const debug: debug.IDebugger = d(__filename);
@@ -161,6 +162,10 @@ export class AsyncHTMLElement implements IAsyncHTMLElement {
return typeof value === 'string' ? value : null;
}
public getLocation(): ProblemLocation {
return null;
}
public isSame(element: AsyncHTMLElement): boolean {
return this._htmlelement.nodeId === element._htmlelement.nodeId;
}

0 comments on commit 537bbbb

Please sign in to comment.