Skip to content

Commit

Permalink
Merge a96b938 into 59e1721
Browse files Browse the repository at this point in the history
  • Loading branch information
Pchelolo committed Jun 19, 2018
2 parents 59e1721 + a96b938 commit 91f5961
Show file tree
Hide file tree
Showing 8 changed files with 245 additions and 4 deletions.
2 changes: 2 additions & 0 deletions config.test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ spec_root: &spec_root

# labs, used for most tests
/{domain:en.wikipedia.beta.wmflabs.org}: *labs_project
# Serbian wiki in beta for language variants tests
/{domain:sr.wikipedia.beta.wmflabs.org}: *default_project

# For security testing we rely on mocks, so it's OK to use French wiki.
/{domain:fr.wikipedia.org}:
Expand Down
83 changes: 83 additions & 0 deletions lib/language_variants_filter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"use strict";

const HyperSwitch = require('hyperswitch');
const URI = HyperSwitch.URI;
const mwUtil = require('./mwUtil');

module.exports = (hyper, req, next, options, specInfo) => {
const rp = req.params;
const acceptLanguage = req.headers && req.headers['accept-language'];
if (!acceptLanguage || mwUtil.isNoCacheRequest(req)) {
delete req.headers['accept-language'];
return next(hyper, req)
// TODO: Store the Vary header so that if the page is possible
// to be converted we return a proper Vary even when accept-language
// is not specified.
// For now we hacky check by domain.
.then((res) => {
const langCode = req.params.domain.substring(0, req.params.domain.indexOf('.'));
if (mwUtil.canConvertLangVariant(hyper, req, langCode)) {
res.headers.vary = 'accept-language';
}
return res;
});
}

const revTableURI = [rp.domain, 'sys', 'page_revisions', 'page', rp.title];
if (rp.revision) {
revTableURI.push(`${rp.revision}`);
}
return hyper.get({ uri: new URI(revTableURI) })
.then((res) => {
const revision = res.body.items[0];
return mwUtil.shouldConvertLangVariant(hyper, req, revision.page_language, acceptLanguage)
.then((shouldConvert) => {
if (!shouldConvert) {
delete req.headers['accept-language'];
return next(hyper, req)
// TODO: Eventually we want to store the Vary header we get from Parsoid!
.then((res) => {
if (mwUtil.canConvertLangVariant(hyper, req, revision.page_language)) {
res.headers.vary = 'accept-language';
}
return res;
});
}

if (/\/page\/html\//.test(specInfo.path)) {
// It's HTML, hit Parsoid for conversion
return next(hyper, req)
.then(html => hyper.post({
uri: new URI([rp.domain, 'sys', 'parsoid', 'transform', 'html', 'to', 'html']),
headers: {
'content-type': 'application/json',
'content-language': revision.page_language
},
body: {
original: {
html: {
headers: html.headers,
body: html.body.toString()
}
},
updates: {
variant: {
source: null,
target: acceptLanguage
}
}
}
}))
// TODO: Now we fix up the Vary header cause Parsoid
// sets content-type in there which is wrong.
.then((res) => {
res.headers.vary = 'accept-language';
return res;
});
} else {
// TODO: It's something else, so just forward to MCS
return next(hyper, req);
}
});
});
};
42 changes: 41 additions & 1 deletion lib/mwUtil.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,46 @@ mwUtil.parseETag = (etag) => {
}
};


/**
* Checks whether a language variant could exist for
* a page in a requested language on a particular wiki.
* @param {HyperSwitch} hyper
* @param {Object} req
* @param {string} pageLanguage the language of the requested page.
* @return {Promise<boolean>}
*/
mwUtil.canConvertLangVariant = (hyper, req, pageLanguage) => {
return mwUtil.getSiteInfo(hyper, req)
.then(siteInfo => !!siteInfo.languagevariants[pageLanguage.toLowerCase()]);
};


/**
* Checks whether a specialized requested language variant exist for
* a page in a requested language on a particular wiki.
* @param {HyperSwitch} hyper
* @param {Object} req
* @param {string} pageLanguage the language of the requested page.
* @param {string} acceptLanguage the language variant accepted by the client.
* @return {Promise<boolean>}
*/
mwUtil.shouldConvertLangVariant = (hyper, req, pageLanguage, acceptLanguage) => {
if (!acceptLanguage) {
return P.resolve(false);
}
return mwUtil.getSiteInfo(hyper, req)
.then((siteInfo) => {
// First, whether specialized variants exist for the page lang on a wiki
const pageLangVariants = siteInfo.languagevariants[pageLanguage.toLowerCase()];
if (!pageLangVariants) {
return false;
}
// Second, if the requested language variant exist for page language
return pageLangVariants.indexOf(acceptLanguage.toLowerCase()) !== -1;
});
};

/**
* Extract the date from an `etag` header of the form
* "<revision>/<tid>/<suffix>"
Expand Down Expand Up @@ -257,7 +297,7 @@ mwUtil.decodeBody = (contentResponse) => {
*
* @param {!HyperSwitch} hyper
* @param {!Object} req
* @param {?String} domain Wiki domain to get siteinfo from (defaults to the request domain).
* @param {string} [domain] Wiki domain to get siteinfo from (defaults to the request domain).
*/
mwUtil.getSiteInfo = (hyper, req, domain) => hyper.get({
uri: new URI([domain || req.params.domain, 'sys', 'action', 'siteinfo'])
Expand Down
22 changes: 21 additions & 1 deletion sys/action.js
Original file line number Diff line number Diff line change
Expand Up @@ -292,13 +292,32 @@ class ActionService {
body: {
action: 'query',
meta: 'siteinfo|filerepoinfo',
siprop: 'general|namespaces|namespacealiases|specialpagealiases',
siprop: 'general' +
'|namespaces' +
'|namespacealiases' +
'|specialpagealiases' +
'|languagevariants',
format: 'json'
}
}, {}, (apiReq, res) => {
if (!res || !res.body || !res.body.query || !res.body.query.general) {
throw new Error(`SiteInfo is unavailable for ${rp.domain}`);
}
// Transform from original response format to
// {
// 'lang' => ['variant1', 'variant2' ... ]
// }
// for ease of use.
const origVariants = res.body.query.languagevariants;
const variants = {};
if (origVariants) {
Object.keys(origVariants).forEach((lang) => {
variants[lang.toLowerCase()] =
Object.keys(origVariants[lang].toLowerCase())
// Filter out non-specific variants like `en`, `zh` etc.
.filter(variant => /-/.test(variant));
});
}
return {
status: 200,
body: {
Expand All @@ -311,6 +330,7 @@ class ActionService {
namespaces: res.body.query.namespaces,
namespacealiases: res.body.query.namespacealiases,
specialpagealiases: res.body.query.specialpagealiases,
languagevariants: variants,
sharedRepoRootURI: findSharedRepoDomain(res),
baseUri: this._getBaseUri(req)
}
Expand Down
8 changes: 6 additions & 2 deletions sys/parsoid.js
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,7 @@ class ParsoidService {
parsoidTo = 'pagebundle';
}
let parsoidFrom = from;
if (from === 'html' && req.body.original && req.body.original['data-parsoid']) {
if (from === 'html' && req.body.original) {
parsoidFrom = 'pagebundle';
}
const parsoidExtras = [];
Expand All @@ -735,6 +735,7 @@ class ParsoidService {
headers: {
'content-type': 'application/json',
'user-agent': req['user-agent'],
'content-language': req.headers['content-language']
},
body: req.body
};
Expand All @@ -761,7 +762,10 @@ class ParsoidService {
return (hyper, req) => {
const rp = req.params;
if ((!req.body && req.body !== '')
|| (!req.body[from] && req.body[from] !== '')) {
// The html/to/html endpoint is a bit different so the `html`
// might not be provided.
|| (!(from === 'html' && to === 'html')
&& !req.body[from] && req.body[from] !== '')) {
throw new HTTPError({
status: 400,
body: {
Expand Down
87 changes: 87 additions & 0 deletions test/features/pagecontent/language_variants.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
'use strict';

// mocha defines to avoid JSHint breakage
/* global describe, it, before, beforeEach, after, afterEach */

const assert = require('../../utils/assert.js');
const preq = require('preq');
const server = require('../../utils/server.js');
const variantsPageTitle = 'RESTBase_Testing_Page';


describe('Language variants', function() {

this.timeout(20000);

before(() => server.start());

it('should request html with no variants', () => {
return preq.get({ uri: `${server.config.variantsWikiBucketURL}/html/${variantsPageTitle}`})
.then((res) => {
assert.deepEqual(res.status, 200);
assert.deepEqual(res.headers.vary.toLowerCase(), 'accept-language');
assert.deepEqual(/1\. Ово је тестна страница/.test(res.body), true);
assert.deepEqual(/2\. Ovo je testna stranica/.test(res.body), true);
});
});

it('should request html with default variant', () => {
return preq.get({
uri: `${server.config.variantsWikiBucketURL}/html/${variantsPageTitle}`,
headers: {
'accept-language': 'sr'
}
})
.then((res) => {
assert.deepEqual(res.status, 200);
assert.deepEqual(res.headers.vary.toLowerCase(), 'accept-language');
assert.deepEqual(/1\. Ово је тестна страница/.test(res.body), true);
assert.deepEqual(/2\. Ovo je testna stranica/.test(res.body), true);
});
});

it('should request html with wrong variant', () => {
return preq.get({
uri: `${server.config.variantsWikiBucketURL}/html/${variantsPageTitle}`,
headers: {
'accept-language': 'sr-blablabla'
}
})
.then((res) => {
assert.deepEqual(res.status, 200);
assert.deepEqual(res.headers.vary.toLowerCase(), 'accept-language');
assert.deepEqual(/1\. Ово је тестна страница/.test(res.body), true);
assert.deepEqual(/2\. Ovo je testna stranica/.test(res.body), true);
});
});

it('should request html with cyrillic variant', () => {
return preq.get({
uri: `${server.config.variantsWikiBucketURL}/html/${variantsPageTitle}`,
headers: {
'accept-language': 'sr-ec'
}
})
.then((res) => {
assert.deepEqual(res.status, 200);
assert.deepEqual(res.headers.vary.toLowerCase(), 'accept-language');
assert.deepEqual(/1\. Ово је тестна страница/.test(res.body), true);
assert.deepEqual(/2\. Ово је тестна страница/.test(res.body), true);
});
});

it('should request html with latin variant', () => {
return preq.get({
uri: `${server.config.variantsWikiBucketURL}/html/${variantsPageTitle}`,
headers: {
'accept-language': 'sr-el'
}
})
.then((res) => {
assert.deepEqual(res.status, 200);
assert.deepEqual(res.headers.vary.toLowerCase(), 'accept-language');
assert.deepEqual(/1\. Ovo je testna stranica/.test(res.body), true);
assert.deepEqual(/2\. Ovo je testna stranica/.test(res.body), true);
});
});
});
3 changes: 3 additions & 0 deletions test/utils/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ var secureURL = hostPort + '/fr.wikipedia.org/v1';
var secureBucketURL = secureURL + '/page';
var labsURL = hostPort + '/en.wikipedia.beta.wmflabs.org/v1';
var labsBucketURL = labsURL + '/page';
const variantsWikiURL = hostPort + '/sr.wikipedia.beta.wmflabs.org/v1';
const variantsWikiBucketURL = variantsWikiURL + '/page';

function loadConfig(path, forceSqlite) {
var confString = fs.readFileSync(path).toString();
Expand Down Expand Up @@ -50,6 +52,7 @@ var config = {
secureApiURL: 'https://fr.wikipedia.org/w/api.php',
labsURL: labsURL,
labsBucketURL: labsBucketURL,
variantsWikiBucketURL: variantsWikiBucketURL,
labsApiURL: 'https://en.wikipedia.beta.wmflabs.org/w/api.php',
logStream: logStream(),
conf: loadConfig(process.env.RB_TEST_CONFIG ? process.env.RB_TEST_CONFIG : __dirname + '/../../config.test.yaml')
Expand Down
2 changes: 2 additions & 0 deletions v1/content.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ paths:
get:
x-route-filters:
- path: lib/ensure_content_type.js
- path: ./lib/language_variants_filter.js
tags:
- Page content
summary: Get latest HTML for a title.
Expand Down Expand Up @@ -399,6 +400,7 @@ paths:
get:
x-route-filters:
- path: lib/ensure_content_type.js
- path: ./lib/language_variants_filter.js
tags:
- Page content
summary: Get HTML for a specific title/revision & optionally timeuuid.
Expand Down

0 comments on commit 91f5961

Please sign in to comment.