Skip to content

Commit

Permalink
Merge 5d61671 into d08ad0e
Browse files Browse the repository at this point in the history
  • Loading branch information
hknustwmf committed Sep 4, 2019
2 parents d08ad0e + 5d61671 commit 38ae9dc
Show file tree
Hide file tree
Showing 8 changed files with 365 additions and 9 deletions.
4 changes: 3 additions & 1 deletion config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,10 @@ services:
# XXX Check the base RESTBase URI
baseUriTemplate: "{{'http://{domain}:7231/{domain}/v1'}}"
parsoid:
# XXX Check Parsoid URL!
# XXX Check Parsoid/JS URL!
host: http://localhost:8142
# XXX Check Parsoid/PHP URL!
host_php: http://localhost:8142
table:
backend: sqlite
dbname: db.sqlite3
Expand Down
1 change: 1 addition & 0 deletions config.frontend.test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ default_project: &default_project
options: &default_options
parsoid:
host: https://parsoid-beta.wmflabs.org
host_php: https://parsoid-beta.wmflabs.org
grace_ttl: 1000000
action:
apiUriTemplate: "{{'https://{domain}/w/api.php'}}"
Expand Down
1 change: 1 addition & 0 deletions config.fullstack.test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ default_project: &default_project
options: &default_options
parsoid:
host: https://parsoid-beta.wmflabs.org
host_php: https://parsoid-beta.wmflabs.org
grace_ttl: 1000000
action:
apiUriTemplate: "{{'https://{domain}/w/api.php'}}"
Expand Down
215 changes: 215 additions & 0 deletions projects/example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,226 @@ paths:
x-modules:
- path: sys/page_save.js
/parsoid:
x-modules:
- path: sys/parsoid_proxy.js
options:
parsoidHost: '{{options.parsoid.host}}'
parsoidPhpHost: '{{options.parsoid.host_php}}'
response_cache_control: '{{options.purged_cache_control}}'
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
/parsoidjs:
x-modules:
- path: sys/parsoid.js
options:
parsoidHost: '{{options.parsoid.host}}'
response_cache_control: '{{options.purged_cache_control}}'
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
# A list of pages that we don't currently want to re-render on
# each edit. Most of these are huge bot-edited pages, which are
# rarely viewed in any case.
rerenderBlacklist:
# en wiki
en.wikipedia.org:
- 'User:B-bot/Event_log'
- 'User:DeltaQuad/UAA/Wait'
- 'User:JamesR/AdminStats'
- 'User:Kudpung/Dashboard'
# Various dashboards
- 'User:Breawycker/Wikipedia'
- 'User:Sonia/dashboard'
- 'User:Ocaasi/dashboard'
- 'User:Nolelover'
- 'User:Calmer_Waters'
- '/User%3ARedwolf24\//'
- 'User:Technical_13/dashboard'
- 'Template:Cratstats'
# Cyberbot is creating 90% of null edits
- '/^User:Cyberbot_I\//'
- '/^User:Cyberbot_II\//'
- '/^User:Cyberpower678\//'
- '/^User:Darts170Darts170\//'
- 'صارف:Cyberbot_I/Run/Adminstats'
- 'Defnyddiwr:Cyberbot_I/Run/Adminstats'
- 'User:Pentjuuu!.!/sandbox'
- 'User:AllyD/CSDlog'
- 'User:Peter_I._Vardy/sandbox-13'
- 'User:I_dream_of_horses/CSD_log'
- 'User:MJ180MJ180/sandbox'
- 'Talk:United_States_presidential_election,_2016'
- 'Wikipedia:Reference_desk/Humanities'
- 'Wikipedia:WikiProject_Deletion_sorting/People'
- 'Wikipedia:WikiProject_Deletion_sorting/United_States_of_America'
- 'Wikipedia:Articles_for_creation/Redirects'
- 'Wikipedia:Administrators%27_noticeboard/Incidents'
# Wikipedia
ca.wikipedia.org:
- 'Usuari:TronaBot/log:Activitat_reversors_per_hores'
ceb.wikipedia.org:
- 'Gumagamit:Lsjbot/Anomalier-PRIVAT'
- 'Gumagamit:Lsjbot/Kartrutor2'
de.wikipedia.org:
- '/The_Big_Bang_Theory\/Staffel/'
- 'Wikipedia:Café'
- 'Wikipedia:Defekte_Weblinks/Bot2015-Problem'
- 'Wikipedia_Diskussion:Hauptseite/Schon_gewusst'
- 'Benutzer:Anglo-Araneophilus/Almigdad_Mojalli'
- 'Benutzer:Wartungsstube/Berlin'
- 'Benutzer:Wartungsstube/Musik'
- 'Benutzer:Wartungsstube/Unternehmen'
- 'Benutzer:Wartungsstube/Schifffahrt'
- 'Benutzer:Verum/ege'
- 'Benutzer:Septembermorgen/Bottabelle/Französische_Kantone_N–Z'
- 'Wikipedia:WikiProjekt_Planen_und_Bauen/Zu_überarbeitende_Artikel'
es.wikipedia.org:
- 'Wikipedia:Café/Archivo/Miscelánea/Actual'
fr.wikipedia.org:
- 'Utilisateur:ZéroBot/Log/Erreurs'
- 'Utilisateur:SyntaxTerror/Ajouts_du_modèle_Autorité'
- '/^Utilisateur:[\s\S]+[Bb]rouillon'
- 'Discussion_utilisateur:NaggoBot/CommonsDR'
- 'Projet:France/Annonces/Admissibilité'
- '/Wikipédia:Le_saviez-vous_.+/Anecdotes_proposées/'
hy.wikipedia.org:
- "/Մասնակից:Omicroñ\\'R/"
it.wikipedia.org:
- 'Utente:Effems/Sandbox7'
nl.wikipedia.org:
- 'Gebruiker:Eg-T2g/Kladblok'
pt.wikipedia.org:
- 'Wikipédia:Pedidos/Bloqueio'
ru.wikipedia.org:
- 'Википедия:Форум/Технический'
- 'Портал:Герпетология'
sv.wikipedia.org:
- 'Användare:Lsjbot/Anomalier-PRIVAT'
- 'Användare:Lsjbot/Namnkonflikter-PRIVAT'
ur.wikipedia.org:
- 'نام_مقامات_ایل'
- 'نام_مقامات_ڈی'
- 'نام_مقامات_جے'
- 'نام_مقامات_جی'
- 'نام_مقامات_ایچ'
- 'نام_مقامات_ایم'
- 'نام_مقامات_ایس'
zh.wikipedia.org:
- 'Wikipedia:互助客栈/条目探讨'
- 'Draft:日本人工湖列表'
# Wikisource
pl.wikisource.org:
- '/^Wśród_czarnych\//'
# Wikimedia
commons.wikimedia.org:
- '/Commons:Featured_picture_candidates\//'
- 'Commons:Quality_images/Subject/Places/Natural_structures'
- '/Commons:Undeletion_requests\//'
- '/Commons:WikiProject_Aviation\/recent_uploads\//'
- '/^(?:User|Benutzer):/'
/parsoidphp:
x-modules:
- path: sys/parsoid.js
options:
parsoidHost: '{{options.parsoid.host_php}}'
response_cache_control: '{{options.purged_cache_control}}'
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
# A list of pages that we don't currently want to re-render on
# each edit. Most of these are huge bot-edited pages, which are
# rarely viewed in any case.
rerenderBlacklist:
# en wiki
en.wikipedia.org:
- 'User:B-bot/Event_log'
- 'User:DeltaQuad/UAA/Wait'
- 'User:JamesR/AdminStats'
- 'User:Kudpung/Dashboard'
# Various dashboards
- 'User:Breawycker/Wikipedia'
- 'User:Sonia/dashboard'
- 'User:Ocaasi/dashboard'
- 'User:Nolelover'
- 'User:Calmer_Waters'
- '/User%3ARedwolf24\//'
- 'User:Technical_13/dashboard'
- 'Template:Cratstats'
# Cyberbot is creating 90% of null edits
- '/^User:Cyberbot_I\//'
- '/^User:Cyberbot_II\//'
- '/^User:Cyberpower678\//'
- '/^User:Darts170Darts170\//'
- 'صارف:Cyberbot_I/Run/Adminstats'
- 'Defnyddiwr:Cyberbot_I/Run/Adminstats'
- 'User:Pentjuuu!.!/sandbox'
- 'User:AllyD/CSDlog'
- 'User:Peter_I._Vardy/sandbox-13'
- 'User:I_dream_of_horses/CSD_log'
- 'User:MJ180MJ180/sandbox'
- 'Talk:United_States_presidential_election,_2016'
- 'Wikipedia:Reference_desk/Humanities'
- 'Wikipedia:WikiProject_Deletion_sorting/People'
- 'Wikipedia:WikiProject_Deletion_sorting/United_States_of_America'
- 'Wikipedia:Articles_for_creation/Redirects'
- 'Wikipedia:Administrators%27_noticeboard/Incidents'
# Wikipedia
ca.wikipedia.org:
- 'Usuari:TronaBot/log:Activitat_reversors_per_hores'
ceb.wikipedia.org:
- 'Gumagamit:Lsjbot/Anomalier-PRIVAT'
- 'Gumagamit:Lsjbot/Kartrutor2'
de.wikipedia.org:
- '/The_Big_Bang_Theory\/Staffel/'
- 'Wikipedia:Café'
- 'Wikipedia:Defekte_Weblinks/Bot2015-Problem'
- 'Wikipedia_Diskussion:Hauptseite/Schon_gewusst'
- 'Benutzer:Anglo-Araneophilus/Almigdad_Mojalli'
- 'Benutzer:Wartungsstube/Berlin'
- 'Benutzer:Wartungsstube/Musik'
- 'Benutzer:Wartungsstube/Unternehmen'
- 'Benutzer:Wartungsstube/Schifffahrt'
- 'Benutzer:Verum/ege'
- 'Benutzer:Septembermorgen/Bottabelle/Französische_Kantone_N–Z'
- 'Wikipedia:WikiProjekt_Planen_und_Bauen/Zu_überarbeitende_Artikel'
es.wikipedia.org:
- 'Wikipedia:Café/Archivo/Miscelánea/Actual'
fr.wikipedia.org:
- 'Utilisateur:ZéroBot/Log/Erreurs'
- 'Utilisateur:SyntaxTerror/Ajouts_du_modèle_Autorité'
- '/^Utilisateur:[\s\S]+[Bb]rouillon'
- 'Discussion_utilisateur:NaggoBot/CommonsDR'
- 'Projet:France/Annonces/Admissibilité'
- '/Wikipédia:Le_saviez-vous_.+/Anecdotes_proposées/'
hy.wikipedia.org:
- "/Մասնակից:Omicroñ\\'R/"
it.wikipedia.org:
- 'Utente:Effems/Sandbox7'
nl.wikipedia.org:
- 'Gebruiker:Eg-T2g/Kladblok'
pt.wikipedia.org:
- 'Wikipédia:Pedidos/Bloqueio'
ru.wikipedia.org:
- 'Википедия:Форум/Технический'
- 'Портал:Герпетология'
sv.wikipedia.org:
- 'Användare:Lsjbot/Anomalier-PRIVAT'
- 'Användare:Lsjbot/Namnkonflikter-PRIVAT'
ur.wikipedia.org:
- 'نام_مقامات_ایل'
- 'نام_مقامات_ڈی'
- 'نام_مقامات_جے'
- 'نام_مقامات_جی'
- 'نام_مقامات_ایچ'
- 'نام_مقامات_ایم'
- 'نام_مقامات_ایس'
zh.wikipedia.org:
- 'Wikipedia:互助客栈/条目探讨'
- 'Draft:日本人工湖列表'
# Wikisource
pl.wikisource.org:
- '/^Wśród_czarnych\//'
# Wikimedia
commons.wikimedia.org:
- '/Commons:Featured_picture_candidates\//'
- 'Commons:Quality_images/Subject/Places/Natural_structures'
- '/Commons:Undeletion_requests\//'
- '/Commons:WikiProject_Aviation\/recent_uploads\//'
- '/^(?:User|Benutzer):/'
options: '{{options}}'

15 changes: 13 additions & 2 deletions projects/sys/default.wmf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,13 @@ paths:
- path: sys/page_save.js
/parsoid:
x-modules:
- path: sys/parsoid.js
options:
- path: sys/parsoid_proxy.js
/parsoidjs:
x-modules:
- path: sys/parsoid.js
options: &parsoidopts
parsoidHost: '{{options.parsoid.host}}'
bucketName: 'parsoid'
response_cache_control: '{{options.purged_cache_control}}'
grace_ttl: '{{default(options.parsoid.grace_ttl, 86400)}}'
# A list of pages that we don't currently want to re-render on
Expand Down Expand Up @@ -116,6 +120,13 @@ paths:
- '/Commons:Undeletion_requests\//'
- '/Commons:WikiProject_Aviation\/recent_uploads\//'
- '/^(?:User|Benutzer):/'
/parsoidphp:
x-modules:
- path: sys/parsoid.js
options:
<<: *parsoidopts
parsoidHost: '{{options.parsoid.host_php}}'
bucketName: 'parsoidphp'
/events:
x-modules:
- path: sys/events.js
Expand Down
13 changes: 7 additions & 6 deletions sys/parsoid.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ function extractTidMeta(html) {
* in `if-unmodified-since` header of the request
* @param {Object} req the request
* @param {Object} res the response
* @return {boolean} true if content has beed modified
* @return {boolean} true if content has been modified
*/
function isModifiedSince(req, res) {
try {
Expand Down Expand Up @@ -125,10 +125,11 @@ class ParsoidService {
_initOpts(opts = {}) {
this.options = opts;
this.parsoidHost = opts.parsoidHost;
this.bucketName = opts.bucketName || 'parsoid';
this.options.stash_ratelimit = opts.stash_ratelimit || 5;
this.options.grace_ttl = opts.grace_ttl || 86400;
this._blacklist = compileReRenderBlacklist(opts.rerenderBlacklist);
if (!opts.parsoidHost) {
if (!this.parsoidHost) {
throw new Error('Parsoid module: the option parsoidHost must be provided!');
}
}
Expand Down Expand Up @@ -170,7 +171,7 @@ class ParsoidService {
*/
getLatestBucketURI(domain, title) {
return new URI([
domain, 'sys', 'key_value', 'parsoid', title
domain, 'sys', 'key_value', this.bucketName, title
]);
}

Expand All @@ -186,7 +187,7 @@ class ParsoidService {
*/
getStashBucketURI(domain, title, revision, tid) {
return new URI([
domain, 'sys', 'key_value', 'parsoid-stash', `${title}:${revision}:${tid}`
domain, 'sys', 'key_value', `${this.bucketName}-stash`, `${title}:${revision}:${tid}`
]);
}

Expand Down Expand Up @@ -799,7 +800,7 @@ module.exports = (options) => {
// Dynamic resource dependencies, specific to implementation
resources: [
{
uri: '/{domain}/sys/key_value/parsoid',
uri: `/{domain}/sys/key_value/${ps.bucketName}`,
headers: {
'content-type': 'application/json'
},
Expand All @@ -808,7 +809,7 @@ module.exports = (options) => {
}
},
{
uri: '/{domain}/sys/key_value/parsoid-stash',
uri: `/{domain}/sys/key_value/${ps.bucketName}-stash`,
headers: {
'content-type': 'application/json'
},
Expand Down
54 changes: 54 additions & 0 deletions sys/parsoid_proxy.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
'use strict';

/*
* Simple proxy to route requests to the client-requested
* Parsoid variant (JS or PHP) during the transition
* period. Parsoid/JS is being phased out and replaced by
* Parsoid/PHP.
*/

const HyperSwitch = require('hyperswitch');
const URI = HyperSwitch.URI;
const mwUtil = require('../lib/mwUtil');
const VARIANT_HDR_NAME = 'x-parsoid-variant';

module.exports = () => {
return {
spec: {
paths: {
'/{+path}': {
all: {
operationId: 'proxy_parsoid_variant'
}
}
}
},
operations: {
proxy_parsoid_variant: (hyper, req) => {
const rootReqHeaders = hyper._rootReq.headers || {};
if (!rootReqHeaders[VARIANT_HDR_NAME]) {
rootReqHeaders[VARIANT_HDR_NAME] = 'JS';
}
const isPhpVariant = /PHP/i.test(rootReqHeaders[VARIANT_HDR_NAME]);

return hyper.request({
method: req.method,
uri: new URI(req.uri.toString().replace('/parsoid/',
isPhpVariant ? '/parsoidphp/' : '/parsoidjs/')),
headers: req.headers,
body: req.body,
query: req.query,
params: req.params
})
.then((res) => {
if (res) {
res.headers = res.headers || {};
res.headers[VARIANT_HDR_NAME] = rootReqHeaders[VARIANT_HDR_NAME];
mwUtil.addVaryHeader(res, VARIANT_HDR_NAME);
}
return res;
});
}
}
};
};
Loading

0 comments on commit 38ae9dc

Please sign in to comment.