-
Notifications
You must be signed in to change notification settings - Fork 80
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement and test the transform API as specced #173
Changes from 7 commits
a0296cb
bcde0ab
57c2eb3
4ebe2d5
ebc31e2
f48eceb
481e194
8560726
ceb4a60
fd14000
71e33e4
b69c7e6
8cc6cea
161f590
f93c1ec
22e1fc4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -91,7 +91,6 @@ PSP.getRevisionInfo = function(restbase, req) { | |
uri: new URI([rp.domain,'sys','page_revisions','page',rp.title,rp.revision]) | ||
}) | ||
.then(function(res) { | ||
// FIXME: use tid range! | ||
var revInfo = res.body.items[0]; | ||
return revInfo; | ||
}); | ||
|
@@ -144,64 +143,140 @@ PSP.transformRevision = function (restbase, req, from, to) { | |
var self = this; | ||
var rp = req.params; | ||
|
||
var fromStorage = { | ||
revid: rp.revision | ||
}; | ||
|
||
function get(format) { | ||
return self.getRevisionInfo(restbase, req) | ||
.then(function(revInfo) { | ||
return restbase.get({ uri: self.getBucketURI(rp, format, revInfo.tid) }); | ||
return restbase.get({ | ||
uri: new URI([rp.domain,'sys','parsoid',format,rp.title,rp.revision]) | ||
}) | ||
.then(function (res) { | ||
if (res.body && | ||
res.body.headers && res.body.headers['content-type'] && | ||
res.body.body) { | ||
fromStorage[format] = { | ||
headers: { | ||
'content-type': res.body.headers['content-type'] | ||
}, | ||
body: res.body.body | ||
}; | ||
if (res.body && res.body.constructor === Buffer) { | ||
res.body = res.body.toString(); | ||
} | ||
return { | ||
headers: { | ||
'content-type': res.headers['content-type'] | ||
}, | ||
body: res.body | ||
}; | ||
}); | ||
} | ||
|
||
return Promise.all([ get('html'), get('wikitext'), get('data-parsoid') ]) | ||
.then(function () { | ||
// Get the revision info just to make sure we have access | ||
return self.getRevisionInfo(restbase, req) | ||
.then(function(revInfo) { | ||
return Promise.props({ | ||
html: get('html'), | ||
// wikitext: get('wikitext'), | ||
'data-parsoid': get('data-parsoid') | ||
}); | ||
}) | ||
.then(function (original) { | ||
original.revid = rp.revision; | ||
var body2 = { | ||
original: fromStorage | ||
original: original | ||
}; | ||
body2[from] = req.body; | ||
return restbase.post({ | ||
uri: new URI([rp.domain,'sys','parsoid','transform',from,'to',to]), | ||
body2[from] = req.body[from]; | ||
var path = [rp.domain,'sys','parsoid','transform',from,'to',to]; | ||
if (rp.title) { | ||
path.push(rp.title); | ||
if (rp.revision) { | ||
path.push(rp.revision); | ||
} | ||
} | ||
var newReq = { | ||
uri: new URI(path), | ||
params: req.params, | ||
headers: { 'content-type': 'application/json' }, | ||
body: body2 | ||
}); | ||
}; | ||
return self.callParsoidTransform(restbase, newReq, from, to); | ||
}); | ||
|
||
}; | ||
|
||
PSP.callParsoidTransform = function callParsoidTransform (restbase, req, from, to) { | ||
var rp = req.params; | ||
// Parsoid currently spells 'wikitext' as 'wt' | ||
var parsoidTo = to; | ||
if (to === 'wikitext') { | ||
parsoidTo = 'wt'; | ||
} else if (to === 'html') { | ||
// Retrieve pagebundle whenever we want HTML | ||
parsoidTo = 'pagebundle'; | ||
} | ||
|
||
|
||
var parsoidExtras = []; | ||
if (rp.title) { | ||
parsoidExtras.push(rp.title); | ||
} else { | ||
// fake title to avoid Parsoid error: <400/No title or wikitext was provided> | ||
parsoidExtras.push('Main_Page'); | ||
} | ||
if (rp.revision) { | ||
parsoidExtras.push(rp.revision); | ||
} | ||
var parsoidExtraPath = parsoidExtras.map(encodeURIComponent).join('/'); | ||
if (parsoidExtraPath) { parsoidExtraPath = '/' + parsoidExtraPath; } | ||
|
||
var domain = rp.domain; | ||
// Re-map test domain | ||
if (domain === 'en.wikipedia.test.local') { domain = 'en.wikipedia.org'; } | ||
var parsoidReq = { | ||
uri: this.parsoidHost + '/v2/' + domain + '/' | ||
+ parsoidTo + parsoidExtraPath, | ||
headers: { 'content-type': 'application/json' }, | ||
body: req.body | ||
}; | ||
console.log(JSON.stringify(parsoidReq, null, 2)); | ||
return restbase.post(parsoidReq); | ||
}; | ||
|
||
/** | ||
* Cheap body.innerHTML extraction. | ||
* | ||
* This is safe as we know that the HTML we are receiving from Parsoid is | ||
* serialized as XML. | ||
*/ | ||
function cheapBodyInnerHTML(html) { | ||
var match = /<body[^>]*>(.*)<\/body>/.exec(html); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe a body can span multiple lines. If so, the correct way would probably be: var re = new RegExp('/<body[^>]*>([\\s\\S]*)<\\/body>/', 'gm');
var match = re.exec(html); There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch! |
||
if (!match) { | ||
throw new Error('No HTML body found!'); | ||
} else { | ||
return match[1]; | ||
} | ||
} | ||
|
||
PSP.makeTransform = function (from, to) { | ||
var self = this; | ||
|
||
return function (restbase, req) { | ||
var rp = req.params; | ||
if (false && !req.body[from]) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
throw new rbUtil.HTTPError({ | ||
status: 400, | ||
body: { | ||
type: 'invalid_request', | ||
description: 'Missing request parameter: ' + from | ||
} | ||
}); | ||
} | ||
var transform; | ||
if (rp.revision) { | ||
return self.transformRevision(restbase, req, from, to); | ||
transform = self.transformRevision(restbase, req, from, to); | ||
} else { | ||
// Parsoid currently spells 'wikitext' as 'wt' | ||
var parsoidTo = (to === 'wikitext') ? 'wt' : to; | ||
|
||
// fake title to avoid Parsoid error: <400/No title or wikitext was provided> | ||
var parsoidExtra = (from === 'html') ? '/_' : ''; | ||
|
||
return restbase.post({ | ||
uri: self.parsoidHost + '/v2/' + rp.domain + '/' + parsoidTo + parsoidExtra, | ||
headers: { 'content-type': 'application/json' }, | ||
body: req.body | ||
}); | ||
transform = self.callParsoidTransform(restbase, req, from, to); | ||
} | ||
return transform | ||
.then(function(res) { | ||
// Unwrap to the flat response format | ||
var innerRes = res.body[to]; | ||
innerRes.status = 200; | ||
// Handle bodyOnly flag | ||
if (to === 'html' && req.body.bodyOnly) { | ||
innerRes.body = cheapBodyInnerHTML(innerRes.body); | ||
} | ||
return innerRes; | ||
}); | ||
}; | ||
}; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
'use strict'; | ||
|
||
// mocha defines to avoid JSHint breakage | ||
/* global describe, it, before, beforeEach, after, afterEach */ | ||
|
||
var assert = require('../../utils/assert.js'); | ||
var server = require('../../utils/server.js'); | ||
var preq = require('preq'); | ||
|
||
var testPage = { | ||
title: 'User:GWicke%2F_restbase_test', | ||
revision: '646859921', | ||
html: "<!DOCTYPE html>\n<html prefix=\"dc: http://purl.org/dc/terms/ mw: http://mediawiki.org/rdf/\" about=\"http://en.wikipedia.org/wiki/Special:Redirect/revision/646859921\"><head prefix=\"mwr: http://en.wikipedia.org/wiki/Special:Redirect/\"><meta property=\"mw:articleNamespace\" content=\"2\"/><link rel=\"dc:replaces\" resource=\"mwr:revision/0\"/><meta property=\"dc:modified\" content=\"2015-02-12T22:30:30.000Z\"/><meta about=\"mwr:user/11429869\" property=\"dc:title\" content=\"GWicke\"/><link rel=\"dc:contributor\" resource=\"mwr:user/11429869\"/><meta property=\"mw:revisionSHA1\" content=\"6417e5e59b2975e65eebb5104ea572913a61db7e\"/><meta property=\"dc:description\" content=\"selser test page\"/><meta property=\"mw:parsoidVersion\" content=\"0\"/><link rel=\"dc:isVersionOf\" href=\"//en.wikipedia.org/wiki/User%3AGWicke/_restbase_test\"/><title>User:GWicke/_restbase_test</title><base href=\"//en.wikipedia.org/wiki/\"/><link rel=\"stylesheet\" href=\"//en.wikipedia.org/w/load.php?modules=mediawiki.legacy.commonPrint,shared|mediawiki.skinning.elements|mediawiki.skinning.content|mediawiki.skinning.interface|skins.vector.styles|site|mediawiki.skinning.content.parsoid&only=styles&skin=vector\"/></head><body id=\"mwAA\" lang=\"en\" class=\"mw-content-ltr sitedir-ltr ltr mw-body mw-body-content mediawiki\" dir=\"ltr\"><div id=\"bar\">Selser test</div></body></html>", | ||
wikitext: '<div id=bar>Selser test' | ||
}; | ||
|
||
describe('transform api', function() { | ||
this.timeout(20000); | ||
|
||
before(function () { return server.start(); }); | ||
|
||
it('html2html', function () { | ||
return preq.post({ | ||
uri: server.config.baseURL | ||
+ '/transform/html/to/html/' + testPage.title | ||
+ '/' + testPage.revision, | ||
body: { | ||
html: testPage.html | ||
} | ||
}) | ||
.then(function (res) { | ||
assert.deepEqual(res.status, 200); | ||
var pattern = /<div id="bar">Selser test<\/div>/; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This block seems to have wrong indenting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Strike that. It seems that the whole file is tab-indented. Are you crossing over to the dark side of tabs? 🙅 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nah, just happened to be tab indented & I didn't change it. Shall fix. |
||
if (!pattern.test(res.body)) { | ||
throw new Error('Expected pattern in response: ' + pattern | ||
+ '\nSaw: ' + JSON.stringify(res, null, 2)); | ||
} | ||
assert.deepEqual(res.headers['content-type'], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. K, done. |
||
'text/html;profile=mediawiki.org/specs/html/1.0.0'); | ||
}); | ||
}); | ||
|
||
it('wt2html', function () { | ||
return preq.post({ | ||
uri: server.config.baseURL | ||
+ '/transform/wikitext/to/html/User:GWicke%2F_restbase_test', | ||
body: { | ||
wikitext: '== Heading ==' | ||
} | ||
}) | ||
.then(function (res) { | ||
assert.deepEqual(res.status, 200); | ||
assert.deepEqual(res.headers['content-type'], | ||
'text/html;profile=mediawiki.org/specs/html/1.0.0'); | ||
var pattern = /<h2.*> Heading <\/h2>/; | ||
if (!pattern.test(res.body)) { | ||
throw new Error('Expected pattern in response: ' + pattern | ||
+ '\nSaw: ' + res.body); | ||
} | ||
}); | ||
}); | ||
|
||
it('wt2html with bodyOnly', function () { | ||
return preq.post({ | ||
uri: server.config.baseURL | ||
+ '/transform/wikitext/to/html/User:GWicke%2F_restbase_test', | ||
body: { | ||
wikitext: '== Heading ==', | ||
bodyOnly: true | ||
} | ||
}) | ||
.then(function (res) { | ||
assert.deepEqual(res.status, 200); | ||
assert.deepEqual(res.headers['content-type'], | ||
'text/html;profile=mediawiki.org/specs/html/1.0.0'); | ||
var pattern = /^<h2.*> Heading <\/h2>$/; | ||
if (!pattern.test(res.body)) { | ||
throw new Error('Expected pattern in response: ' + pattern | ||
+ '\nSaw: ' + res.body); | ||
} | ||
}); | ||
}); | ||
|
||
|
||
it('html2wt, no-selser', function () { | ||
return preq.post({ | ||
uri: server.config.baseURL | ||
+ '/transform/html/to/wikitext/User:GWicke%2F_restbase_test', | ||
body: { | ||
html: '<body>The modified HTML</body>' | ||
} | ||
}) | ||
.then(function (res) { | ||
assert.deepEqual(res.status, 200); | ||
assert.deepEqual(res.body, 'The modified HTML'); | ||
assert.deepEqual(res.headers['content-type'], | ||
'text/plain;profile=mediawiki.org/specs/wikitext/1.0.0'); | ||
}); | ||
}); | ||
|
||
// it('html2wt, selser', function () { | ||
// return preq.post({ | ||
// uri: server.config.baseURL | ||
// + '/transform/html/to/wikitext/' + testPage.title | ||
// + '/' + testPage.revision, | ||
// body: { | ||
// html: testPage.html | ||
// } | ||
// }) | ||
// .then(function (res) { | ||
// assert.deepEqual(res.status, 200); | ||
// assert.deepEqual(res.body, testPage.wikitext); | ||
// assert.deepEqual(res.headers['content-type'], | ||
// 'text/plain;profile=mediawiki.org/specs/wikitext/1.0.0'); | ||
// }); | ||
// }); | ||
|
||
}); | ||
|
||
|
||
|
||
/* TODO: actually implement wikitext fetching | ||
describe('storage-backed transform api', function() { | ||
this.timeout(20000); | ||
|
||
before(function () { return server.start(); }); | ||
|
||
it('should load a specific title/revision from storage to send as the "original"', function () { | ||
return preq.post({ | ||
uri: server.config.baseURL + '/transform/html/to/wikitext/Main_Page/1', | ||
headers: { 'content-type': 'application/json' }, | ||
body: { | ||
headers: { | ||
'content-type': 'text/html;profile=mediawiki.org/specs/html/1.0.0' | ||
}, | ||
body: '<html>The modified HTML</html>' | ||
} | ||
}) | ||
.then(function (res) { | ||
assert.deepEqual(res.status, 200); | ||
assert.deepEqual(res.body, { | ||
wikitext: { | ||
headers: { | ||
'content-type': 'text/plain;profile=mediawiki.org/specs/wikitext/1.0.0' | ||
}, | ||
body: 'The modified HTML' | ||
} | ||
}); | ||
}); | ||
}); | ||
|
||
}); | ||
*/ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These things are resolved in #172 ;)