Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement and test the transform API as specced #173

Merged
merged 16 commits into from
Feb 14, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ templates:
type: file
options:
parsoidHost: http://parsoid-lb.eqiad.wikimedia.org
# For local testing, use:
# parsoidHost: http://localhost:8000

/{module:action}:
x-modules:
Expand Down
10 changes: 5 additions & 5 deletions lib/rbUtil.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,7 @@ function read(req) {
});

req.on('end', function() {
req.body = Buffer.concat(chunks);
resolve();
return Buffer.concat(chunks);
});
});
}
Expand All @@ -132,6 +131,7 @@ rbUtil.parsePOST = function parsePOST(req) {
} else if (req.method !== 'POST') {
return Promise.resolve();
} else {
// Parse the POST
var headers = req.headers;
if (!headers['content-type']) {
headers = {
Expand All @@ -146,12 +146,12 @@ rbUtil.parsePOST = function parsePOST(req) {
// Increase the form field size limit from the 1M default.
limits: { fieldSize: 15 * 1024 * 1024 }
});
req.body = req.body || {};
var body = {};
bboy.on('field', function (field, val) {
req.body[field] = val;
body[field] = val;
});
bboy.on('finish', function () {
resolve();
resolve(body);
});
req.pipe(bboy);
});
Expand Down
3 changes: 1 addition & 2 deletions lib/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,9 @@ function handleRequest (opts, req, resp) {
return rbUtil.parsePOST(req)

// Then process the request
.then(function() {
.then(function(body) {
// Create a new, clean request object
var urlData = rbUtil.parseURL(req.url);
var body = req.body;

if (/^application\/json/i.test(req.headers['content-type'])) {
try {
Expand Down
142 changes: 106 additions & 36 deletions mods/parsoid.js
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ PSP.getRevisionInfo = function(restbase, req) {
uri: new URI([rp.domain,'sys','page_revisions','page',rp.title,rp.revision])
})
.then(function(res) {
// FIXME: use tid range!
var revInfo = res.body.items[0];
return revInfo;
});
Expand Down Expand Up @@ -171,64 +170,135 @@ PSP.transformRevision = function (restbase, req, from, to) {
var self = this;
var rp = req.params;

var fromStorage = {
revid: rp.revision
};

function get(format) {
return self.getRevisionInfo(restbase, req)
.then(function(revInfo) {
return restbase.get({ uri: self.getBucketURI(rp, format, revInfo.tid) });
return restbase.get({
uri: new URI([rp.domain,'sys','parsoid',format,rp.title,rp.revision])
})
.then(function (res) {
if (res.body &&
res.body.headers && res.body.headers['content-type'] &&
res.body.body) {
fromStorage[format] = {
headers: {
'content-type': res.body.headers['content-type']
},
body: res.body.body
};
if (res.body && res.body.constructor === Buffer) {
res.body = res.body.toString();
}
return {
headers: {
'content-type': res.headers['content-type']
},
body: res.body
};
});
}

return Promise.all([ get('html'), get('wikitext'), get('data-parsoid') ])
.then(function () {
return Promise.props({
html: get('html'),
// wikitext: get('wikitext'),
'data-parsoid': get('data-parsoid')
})
.then(function (original) {
original.revid = rp.revision;
var body2 = {
original: fromStorage
original: original
};
body2[from] = req.body;
return restbase.post({
uri: new URI([rp.domain,'sys','parsoid','transform',from,'to',to]),
body2[from] = req.body[from];
var path = [rp.domain,'sys','parsoid','transform',from,'to',to];
if (rp.title) {
path.push(rp.title);
if (rp.revision) {
path.push(rp.revision);
}
}
var newReq = {
uri: new URI(path),
params: req.params,
headers: { 'content-type': 'application/json' },
body: body2
});
};
return self.callParsoidTransform(restbase, newReq, from, to);
});

};

PSP.callParsoidTransform = function callParsoidTransform (restbase, req, from, to) {
var rp = req.params;
// Parsoid currently spells 'wikitext' as 'wt'
var parsoidTo = to;
if (to === 'wikitext') {
parsoidTo = 'wt';
} else if (to === 'html') {
// Retrieve pagebundle whenever we want HTML
parsoidTo = 'pagebundle';
}


var parsoidExtras = [];
if (rp.title) {
parsoidExtras.push(rp.title);
} else {
// fake title to avoid Parsoid error: <400/No title or wikitext was provided>
parsoidExtras.push('Main_Page');
}
if (rp.revision) {
parsoidExtras.push(rp.revision);
}
var parsoidExtraPath = parsoidExtras.map(encodeURIComponent).join('/');
if (parsoidExtraPath) { parsoidExtraPath = '/' + parsoidExtraPath; }

var domain = rp.domain;
// Re-map test domain
if (domain === 'en.wikipedia.test.local') { domain = 'en.wikipedia.org'; }
var parsoidReq = {
uri: this.parsoidHost + '/v2/' + domain + '/'
+ parsoidTo + parsoidExtraPath,
headers: { 'content-type': 'application/json' },
body: req.body
};
return restbase.post(parsoidReq);
};

/**
* Cheap body.innerHTML extraction.
*
* This is safe as we know that the HTML we are receiving from Parsoid is
* serialized as XML.
*/
function cheapBodyInnerHTML(html) {
var match = /<body[^>]*>([\s\S]*)<\/body>/.exec(html);
if (!match) {
throw new Error('No HTML body found!');
} else {
return match[1];
}
}

PSP.makeTransform = function (from, to) {
var self = this;

return function (restbase, req) {
var rp = req.params;
if (!req.body[from]) {
throw new rbUtil.HTTPError({
status: 400,
body: {
type: 'invalid_request',
description: 'Missing request parameter: ' + from
}
});
}
var transform;
if (rp.revision) {
return self.transformRevision(restbase, req, from, to);
transform = self.transformRevision(restbase, req, from, to);
} else {
// Parsoid currently spells 'wikitext' as 'wt'
var parsoidTo = (to === 'wikitext') ? 'wt' : to;

// fake title to avoid Parsoid error: <400/No title or wikitext was provided>
var parsoidExtra = (from === 'html') ? '/_' : '';

return restbase.post({
uri: self.parsoidHost + '/v2/' + rp.domain + '/' + parsoidTo + parsoidExtra,
headers: { 'content-type': 'application/json' },
body: req.body
});
transform = self.callParsoidTransform(restbase, req, from, to);
}
return transform
.then(function(res) {
// Unwrap to the flat response format
var innerRes = res.body[to];
innerRes.status = 200;
// Handle bodyOnly flag
if (to === 'html' && req.body.bodyOnly) {
innerRes.body = cheapBodyInnerHTML(innerRes.body);
}
return innerRes;
});
};
};

Expand Down
Loading