diff --git a/config.frontend.test.yaml b/config.frontend.test.yaml
index 126672376..817bf9f36 100644
--- a/config.frontend.test.yaml
+++ b/config.frontend.test.yaml
@@ -8,11 +8,8 @@ default_project: &default_project
- path: projects/v1/default.wmf.yaml
options: &default_options
parsoid:
- host: https://parsoid-beta.wmflabs.org
+ host: https://en.wikipedia.beta.wmflabs.org/w/rest.php
grace_ttl: 1000000
- php_host: https://en.wikipedia.beta.wmflabs.org/w/rest.php
- proxy:
- default_variant: php
action:
apiUriTemplate: "{{'https://{domain}/w/api.php'}}"
baseUriTemplate: "{{'https://{domain}/api/rest_v1'}}"
diff --git a/config.fullstack.test.yaml b/config.fullstack.test.yaml
index 0bd9b6542..e23218261 100644
--- a/config.fullstack.test.yaml
+++ b/config.fullstack.test.yaml
@@ -7,11 +7,8 @@ default_project: &default_project
- path: projects/v1/default.wmf.yaml
options: &default_options
parsoid:
- host: https://parsoid-beta.wmflabs.org
+ host: https://en.wikipedia.beta.wmflabs.org/w/rest.php
grace_ttl: 1000000
- php_host: https://en.wikipedia.beta.wmflabs.org/w/rest.php
- proxy:
- default_variant: php
action:
apiUriTemplate: "{{'https://{domain}/w/api.php'}}"
baseUriTemplate: "{{'https://{domain}/api/rest_v1'}}"
diff --git a/lib/parsoid.js b/lib/parsoid.js
deleted file mode 100644
index 1942cfed6..000000000
--- a/lib/parsoid.js
+++ /dev/null
@@ -1,820 +0,0 @@
-'use strict';
-
-const P = require('bluebird');
-const HyperSwitch = require('hyperswitch');
-const URI = HyperSwitch.URI;
-const HTTPError = HyperSwitch.HTTPError;
-
-const uuidv1 = require('uuid/v1');
-const uuidUtils = require('./uuidUtils');
-
-const mwUtil = require('./mwUtil');
-
-// Temporary work-around for Parsoid issue
-// https://phabricator.wikimedia.org/T93715
-function normalizeHtml(html) {
- return html && html.toString &&
- html.toString()
- .replace(/ about="[^"]+"(?=[/> ])|]+>/g, '');
-}
-function sameHtml(a, b) {
- return normalizeHtml(a) === normalizeHtml(b);
-}
-
-/**
- * Makes sure we have a meta tag for the tid in our output
- * @param {string} html original HTML content
- * @param {string} tid the tid to insert
- * @return {string} modified html
- */
-function insertTidMeta(html, tid) {
- if (!/]+>/.test(html)) {
- return html.replace(/(
]+>)/,
- `$1`);
- }
- return html;
-}
-
-function extractTidMeta(html) {
- // Fall back to an inline meta tag in the HTML
- const tidMatch = new RegExp('')
- .exec(html);
- return tidMatch && (tidMatch[1] || tidMatch[2]);
-}
-
-/**
- * Checks whether the content has been modified since the timestamp
- * in `if-unmodified-since` header of the request
- * @param {Object} req the request
- * @param {Object} res the response
- * @return {boolean} true if content has beed modified
- */
-function isModifiedSince(req, res) {
- try {
- if (req.headers['if-unmodified-since']) {
- const jobTime = Date.parse(req.headers['if-unmodified-since']);
- const revInfo = mwUtil.parseETag(res.headers.etag);
- return revInfo && uuidUtils.getDate(revInfo.tid) >= jobTime;
- }
- } catch (e) {
- // Ignore errors from date parsing
- }
- return false;
-}
-
-/** HTML resource_change event emission
- * @param {HyperSwitch} hyper the hyperswitch router object
- * @param {Object} req the request
- * @param {boolean} [newContent] whether this is the newest revision
- * @return {Object} update response
- */
-function _dependenciesUpdate(hyper, req, newContent = true) {
- const rp = req.params;
- return mwUtil.getSiteInfo(hyper, req)
- .then((siteInfo) => {
- const baseUri = siteInfo.baseUri.replace(/^https?:/, '');
- const publicURI = `${baseUri}/page/html/${encodeURIComponent(rp.title)}`;
- const body = [ { meta: { uri: `${publicURI}/${rp.revision}` } } ];
- if (newContent) {
- body.push({ meta: { uri: publicURI } });
- }
- return hyper.post({
- uri: new URI([rp.domain, 'sys', 'events', '']),
- body
- }).catch((e) => {
- hyper.logger.log('warn/bg-updates', e);
- });
- });
-}
-
-function compileReRenderBlacklist(blacklist) {
- const result = {};
- blacklist = blacklist || {};
- Object.keys(blacklist).forEach((domain) => {
- result[domain] = mwUtil.constructRegex(blacklist[domain]);
- });
- return result;
-}
-
-class ParsoidService {
- constructor(options) {
- this._initOpts(options);
- }
-
- _initOpts(opts = {}) {
- this.options = opts;
- this.parsoidUri = opts.host || opts.parsoidHost;
- this.options.stash_ratelimit = opts.stash_ratelimit || 5;
- this._blacklist = compileReRenderBlacklist(opts.rerenderBlacklist);
- if (!this.parsoidUri) {
- throw new Error('Parsoid module: the option host must be provided!');
- }
- // remove the trailing slash, if any
- if (this.parsoidUri.slice(-1) === '/') {
- this.parsoidUri = this.parsoidUri.slice(0, -1);
- }
- }
-
- _checkStashRate(hyper, req) {
- if (!hyper.ratelimiter) {
- return;
- }
- if (hyper._rootReq.headers['x-request-class'] !== 'external') {
- return;
- }
- if (!((req.query && req.query.stash) || (req.body && req.body.stash))) {
- return;
- }
- const key = `${hyper.config.service_name}.parsoid_stash|` +
- `${hyper._rootReq.headers['x-client-ip']}`;
- if (hyper.ratelimiter.isAboveLimit(key, this.options.stash_ratelimit)) {
- hyper.logger.log('warn/parsoid/stashlimit', {
- key,
- rate_limit_per_second: this.options.stash_ratelimit,
- message: 'Stashing rate limit exceeded'
- });
- throw new HTTPError({
- status: 429,
- body: {
- type: 'request_rate_exceeded',
- title: 'Stashing rate limit exceeded',
- rate_limit_per_second: this.options.stash_ratelimit
- }
- });
- }
- }
-
- /**
- * Assembles the request that is to be used to call the Parsoid service
- *
- * @param {Object} req the original request received by the module
- * @param {string} path the path portion of the URI, without the domain or API version
- * @param {Object} [headers] the headers to send, defaults to req.headers
- * @param {Object} [body] the body of the request, defaults to undefined
- */
- _getParsoidReq(req, path, headers, body) {
- throw new Error('Parsoid module: _getParsoidReq() not implemented!');
- }
-
- /**
- * Gets the URI of a bucket for the latest Parsoid content
- *
- * @param {string} domain the domain name
- * @param {string} title the article title
- */
- _getLatestBucketURI(domain, title) {
- throw new Error('Parsoid module: _getLatestBucketURI() not implemented!');
- }
-
- /**
- * Gets the URI of a bucket for stashing Parsoid content. Used both for stashing
- * original HTML/Data-Parsoid for normal edits as well as for stashing transforms
- *
- * @param {string} domain the domain name
- * @param {string} title the article title
- * @param {number} revision the revision of the article
- * @param {string} tid the TID of the content
- */
- _getStashBucketURI(domain, title, revision, tid) {
- throw new Error('Parsoid module: _getStashBucketURI() not implemented!');
- }
-
- /**
- * Get full content from the stash bucket.
- * @param {HyperSwitch} hyper the hyper object to route requests
- * @param {string} domain the domain name
- * @param {string} title the article title
- * @param {number} revision the article revision
- * @param {string} tid the render TID
- * @return {Promise