Skip to content
Permalink
Browse files

Fix: Change how traversing is done and removed IAsync* references

  • Loading branch information...
sarvaje authored and antross committed Mar 8, 2019
1 parent d6e8c21 commit 5bc0b1657f024068c1479ee226dbeff3d806000a
@@ -35,14 +35,18 @@ import { debug as d } from 'hint/dist/src/lib/utils/debug';
import { getContentTypeData, getType } from 'hint/dist/src/lib/utils/content-type';
import {
HttpHeaders,
HTMLDocument,
IConnector,
ElementFound, Event, FetchEnd, FetchError, TraverseDown, TraverseUp,
HTMLElement,
Event, FetchEnd, FetchError,
NetworkData
} from 'hint/dist/src/lib/types';
import { JSDOMAsyncHTMLElement, JSDOMAsyncHTMLDocument } from 'hint/dist/src/lib/types/jsdom-async-html';
import { Engine } from 'hint/dist/src/lib/engine';
import isHTMLDocument from 'hint/dist/src/lib/utils/network/is-html-document';
import createHTMLDocument from 'hint/dist/src/lib/utils/dom/create-html-document';
import traverse from 'hint/dist/src/lib/utils/dom/traverse';
import { Requester } from '@hint/utils-connector-tools/dist/src/requester';

import CustomResourceLoader from './resource-loader';
import { beforeParse } from './before-parse';

@@ -62,9 +66,9 @@ export default class JSDOMConnector implements IConnector {
private _href: string = '';
private _targetNetworkData!: NetworkData;
private _window!: Window;
private _document!: JSDOMAsyncHTMLDocument;
private _document!: HTMLDocument;
private _timeout: number;
private _resourceLoader: ResourceLoader;
private _resourceLoader?: ResourceLoader;
private _subprocesses: Set<ChildProcess>;

public request: Requester;
@@ -77,7 +81,6 @@ export default class JSDOMConnector implements IConnector {
this.request = new Requester(this._options);
this.server = server;
this._timeout = server.timeout;
this._resourceLoader = new CustomResourceLoader(this);
this._subprocesses = new Set();
}

@@ -109,47 +112,6 @@ export default class JSDOMConnector implements IConnector {
return r.get(uri);
}

/** Traverses the DOM while sending `element::typeofelement` events. */
private async traverseAndNotify(element: HTMLElement) {
const eventName = `element::${element.nodeName.toLowerCase()}` as 'element::*';

debug(`emitting ${eventName}`);
/*
* should we freeze it? what about the other siblings, children,
* parents? We should have an option to not allow modifications
* maybe we create a custom object that only exposes read only
* properties?
*/
const event: ElementFound = {
element: new JSDOMAsyncHTMLElement(element),
resource: this.finalHref
};

await this.server.emitAsync(eventName, event);
for (let i = 0; i < element.children.length; i++) {
const child: HTMLElement = element.children[i] as HTMLElement;

debug('next children');
const traverseDown: TraverseDown = {
element: new JSDOMAsyncHTMLElement(element),
resource: this.finalHref
};

await this.server.emitAsync(`traverse::down`, traverseDown);
await this.traverseAndNotify(child);

}

const traverseUp: TraverseUp = {
element: new JSDOMAsyncHTMLElement(element),
resource: this.finalHref
};

await this.server.emitAsync(`traverse::up`, traverseUp);

return Promise.resolve();
}

/**
* JSDOM doesn't download the favicon automatically, this method:
*
@@ -160,7 +122,7 @@ export default class JSDOMConnector implements IConnector {
const href = (element && element.getAttribute('href')) || '/favicon.ico';

try {
await this._resourceLoader.fetch(new URL(href, this.finalHref).href, { element });
await this._resourceLoader!.fetch(new URL(href, this.finalHref).href, { element });
} catch (e) {
/* istanbul ignore next */
debug('Error loading ${href}', e);
@@ -200,11 +162,10 @@ export default class JSDOMConnector implements IConnector {

try {
this._targetNetworkData = await this.fetchContent(target);
} catch (err) {
} catch (err) /* istanbul ignore next */ {
const hops: string[] = this.request.getRedirects(err.uri);
const fetchError: FetchError = {
element: null as any,
/* istanbul ignore next */
error: err.error ? err.error : err,
hops,
resource: href
@@ -263,6 +224,8 @@ export default class JSDOMConnector implements IConnector {
debug(`Console: ${err}`);
});

this._resourceLoader = new CustomResourceLoader(this, fetchEnd.response.body.content);

const jsdom = new JSDOM(this._targetNetworkData.response.body.content, {
beforeParse: beforeParse(this.finalHref),
includeNodeLocations: true,
@@ -276,13 +239,7 @@ export default class JSDOMConnector implements IConnector {

this._window = window;

const onLoad = async () => {
this._document = new JSDOMAsyncHTMLDocument(window.document);

const evaluateEvent: Event = { resource: this.finalHref };

await this.server.emitAsync('can-evaluate::script', evaluateEvent);

const onLoad = () => {
/*
* Even though `onLoad()` is called on `window.onload`
* (so all resoruces and scripts executed), we might want
@@ -293,9 +250,17 @@ export default class JSDOMConnector implements IConnector {

debug(`${this.finalHref} loaded, traversing`);
try {
await this.server.emitAsync('traverse::start', event);
await this.traverseAndNotify(window.document.children[0] as HTMLElement);
await this.server.emitAsync('traverse::end', event);
const html = this._window.document.documentElement.outerHTML;

const htmlDocument = createHTMLDocument(html);

this._document = htmlDocument;

const evaluateEvent: Event = { resource: this.finalHref };

await this.server.emitAsync('can-evaluate::script', evaluateEvent);

await traverse(htmlDocument, this.server, this.finalHref);

// We download only the first favicon found
await this.getFavicon(window.document.querySelector('link[rel~="icon"]'));
@@ -421,7 +386,7 @@ export default class JSDOMConnector implements IConnector {
}

/* istanbul ignore next */
public querySelectorAll(selector: string): Promise<JSDOMAsyncHTMLElement[]> {
public querySelectorAll(selector: string): HTMLElement[] {
return this._document.querySelectorAll(selector);
}

@@ -432,7 +397,7 @@ export default class JSDOMConnector implements IConnector {
*/

/* istanbul ignore next */
public get dom(): JSDOMAsyncHTMLDocument {
public get dom(): HTMLDocument {
return this._document;
}

@@ -441,7 +406,7 @@ export default class JSDOMConnector implements IConnector {
}

/* istanbul ignore next */
public get html(): Promise<string> {
public get html(): string {
return this._document.pageHTML();
}
}
@@ -4,22 +4,26 @@ import { debug as d } from 'hint/dist/src/lib/utils/debug';
import { ResourceLoader } from 'jsdom';

import JSDOMConnector from './connector';
import { JSDOMAsyncHTMLElement } from 'hint/dist/src/lib/types/jsdom-async-html';
import { HTMLDocument } from 'hint/dist/src/lib/types';
import createHTMLDocument from 'hint/dist/src/lib/utils/dom/create-html-document';
import { NetworkData, FetchEnd, FetchError } from 'hint/dist/src/lib/types';
import { getContentTypeData, getType } from 'hint/dist/src/lib/utils/content-type';
import getElementByUrl from 'hint/dist/src/lib/utils/dom/get-element-by-url';

const debug: debug.IDebugger = d(__filename);

export default class CustomResourceLoader extends ResourceLoader {
private _connector: JSDOMConnector;
private _HTMLDocument: HTMLDocument;

public constructor(connector: JSDOMConnector) {
public constructor(connector: JSDOMConnector, html: string) {
super();

this._connector = connector;
this._HTMLDocument = createHTMLDocument(html);
}

public async fetch(url: string, options: { element: HTMLElement }): Promise<Buffer | null> {
public async fetch(url: string, options: any): Promise<Buffer | null> {
/* istanbul ignore if */
if (!url) {
const promise = Promise.resolve(null);
@@ -29,9 +33,14 @@ export default class CustomResourceLoader extends ResourceLoader {
return await promise;
}

/*
* We need a `HTMLElement` instead of the element returned by jsdom.
* To do so, we create a query from the element returned by jsdom and
* look for it in our `HTMLDocument`.
*/
const element = getElementByUrl(this._HTMLDocument, url);
const urlAsUrl = new URL(url);
let resourceUrl: string = urlAsUrl.href;
const element = options.element ? new JSDOMAsyncHTMLElement(options.element) : null;

/* istanbul ignore if */
if (!urlAsUrl.protocol) {
@@ -10,17 +10,17 @@
"devDependencies": {
"@types/lodash": "^4.14.122",
"@types/node": "11.10.5",
"@typescript-eslint/eslint-plugin": "^1.4.2",
"@typescript-eslint/parser": "1.4.2",
"chrome-remote-debug-protocol": "^1.2.20180422",
"cpx": "^1.5.0",
"eslint": "^5.15.1",
"eslint-plugin-import": "^2.16.0",
"eslint-plugin-markdown": "^1.0.0",
"@typescript-eslint/eslint-plugin": "^1.4.2",
"hint": "^4.5.0",
"npm-run-all": "^4.1.5",
"rimraf": "^2.6.3",
"typescript": "^3.3.3333",
"@typescript-eslint/parser": "1.4.2"
"typescript": "^3.3.3333"
},
"engines": {
"node": ">=8.0.0"
Oops, something went wrong.

0 comments on commit 5bc0b16

Please sign in to comment.
You can’t perform that action at this time.