Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch all PDF render traffic to new Proton service #1126

Merged
merged 3 commits into from
Jun 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,15 @@ coverage
config.yaml
node_modules
npm-debug.log
package-lock.json

# WebStorm IDE files
.idea/*
*.iml

#VS Code
.vscode

# vim temp files
*swp

Expand Down
6 changes: 1 addition & 5 deletions config.example.wikimedia.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,7 @@ default_project: &default_project
pdf:
# Cache PDF for 5 minutes since it's not purged
cache_control: s-maxage=600, max-age=600
uri: https://pdfrender-beta.wmflabs.org
new_uri: https://proton-beta.wmflabs.org
new_probability: 1
secret: secret
scheme: https
uri: https://proton-beta.wmflabs.org
transform:
cx_host: https://cxserver-beta.wmflabs.org
skip_updates: false
Expand Down
4 changes: 1 addition & 3 deletions config.test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ default_project: &default_project
pdf:
# Cache PDF for 5 minutes since it's not purged
cache_control: s-maxage=600, max-age=600
uri: https://pdfrender-beta.wmflabs.org
new_uri: https://proton-beta.wmflabs.org
secret: secret
uri: https://proton-beta.wmflabs.org
transform:
cx_host: https://cxserver-beta.wmflabs.org
skip_updates: false
Expand Down
14 changes: 6 additions & 8 deletions test/features/pdf.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@ describe('PDF Service', () => {
before(() => server.start());
after(() => server.stop());

// TODO: PDF tests a very unreliable, so skip them until T181084 is resolved
it.skip('Should get PDF for a page', () => {
it('Should get PDF for a page', () => {
return preq.get({
uri: `${server.config.bucketURL('ru.wikipedia.org')}/pdf/%D0%94%D0%B0%D1%80%D1%82_%D0%92%D0%B5%D0%B9%D0%B4%D0%B5%D1%80`
uri: `${server.config.bucketURL('en.wikipedia.beta.wmflabs.org')}/pdf/%D0%94%D0%B0%D1%80%D1%82_%D0%92%D0%B5%D0%B9%D0%B4%D0%B5%D1%80`,
})
.then((res) => {
assert.deepEqual(res.status, 200);
Expand All @@ -22,13 +21,12 @@ describe('PDF Service', () => {
assert.deepEqual(res.headers['content-type'], 'application/pdf');
assert.ok(/"\d+\/[\d\w-]+"/.test(res.headers.etag));
assert.ok(res.body.length !== 0);
});
}).timeout(100000);
});

// TODO: PDF tests a very unreliable, so skip them until T181084 is resolved
it.skip('Should get PDF for a page containing a quote in its title', () => {
it('Should get PDF for a page containing a quote in its title', () => {
return preq.get({
uri: `${server.config.bucketURL()}/pdf/"...And_Ladies_of_the_Club"`
uri: `${server.config.bucketURL('en.wikipedia.beta.wmflabs.org')}/pdf/"...And_Ladies_of_the_Club"`,
})
.then((res) => {
assert.deepEqual(res.status, 200);
Expand All @@ -38,6 +36,6 @@ describe('PDF Service', () => {
assert.deepEqual(res.headers['content-type'], 'application/pdf');
assert.ok(/"\d+\/[\d\w-]+"/.test(res.headers.etag));
assert.ok(res.body.length !== 0);
});
}).timeout(100000);
});
});
2 changes: 1 addition & 1 deletion test/features/router/misc.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ function getHeader(res, name) {
}

describe('router - misc', function() {
this.timeout(20000);
this.timeout(100000);
const server = new Server();
before(() => server.start());
after(() => server.stop());
Expand Down
63 changes: 7 additions & 56 deletions v1/pdf.js
Original file line number Diff line number Diff line change
@@ -1,70 +1,21 @@
'use strict';

const HyperSwitch = require('hyperswitch');
const URI = HyperSwitch.URI;
const spec = HyperSwitch.utils.loadSpec(`${__dirname}/pdf.yaml`);

function filenameParameters(name) {
// Return two parameters
const encodedName = `${encodeURIComponent(name)}.pdf`;
const quotedName = `"${encodedName.replace(/"/g, '\\"')}"`;
return `filename=${quotedName}; filename*=UTF-8''${encodedName}`;
}

/**
* PDF filename formatting / escaping utilities.
* @param {Object} options
* @return {Object} response with headers and PDF in body
*/

module.exports = (options) => ({
spec,
globals: {
options
},
operations: {
generatePDF: (hyper, req) => {
const rp = req.params;
const newProbability = options.new_probability || 0;
return hyper.get(new URI([rp.domain, 'sys', 'page_revisions', 'page', rp.title]))
.then((latestRevision) => {
if (options.new_uri && (Math.random() < newProbability || req.query.new_pdf)) {
const newResult = hyper.get(new URI(
`${options.new_uri}/${rp.domain}/v1/` +
`pdf/${encodeURIComponent(rp.title)}/a4/desktop`
))
.catch((e) => {
hyper.logger.log('error/proton', e);
if (req.query.new_pdf) {
throw e;
}
});
if (req.query.new_pdf) {
return newResult;
}
}
return hyper.get({
uri: new URI(`${options.uri}/pdf`),
query: {
accessKey: options.secret,
url: `${options.scheme || 'https'}://${rp.domain}/wiki/` +
`${encodeURIComponent(rp.title)}?printable=yes`
}
})
.then((res) => {
return {
status: 200,
headers: {
'content-disposition': `attachment; ${filenameParameters(rp.title)}`,
'content-type': res.headers['content-type'],
'content-length': res.headers['content-length'],
'cache-control': options['cache-control'] ||
's-maxage=600, max-age=600',
etag: latestRevision.headers.etag
},
body: res.body
};
});
});
options,
filenameParameters(name) {
// Return two parameters
const encodedName = `${encodeURIComponent(name)}.pdf`;
const quotedName = `"${encodedName.replace(/"/g, '\\"')}"`;
return `filename=${quotedName}; filename*=UTF-8''${encodedName}`;
}
}
});
52 changes: 29 additions & 23 deletions v1/pdf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ paths:
- path: ./lib/access_check_filter.js
options:
redirect_cache_control: '{{options.cache_control}}'
- type: default
name: ratelimit_route
options:
limits:
internal: 5
external: 1
get:
tags:
- Page content
Expand Down Expand Up @@ -57,27 +63,27 @@ paths:
application/problem+json:
schema:
$ref: '#/components/schemas/problem'
operationId: generatePDF
# x-request-handler:
# - get_latest_revision:
# request:
# method: get
# uri: /{domain}/sys/page_revisions/page/{title}
# - get_pdf_from_backend:
# request:
# method: get
# # Note: The title needs to be encoded twice.
# uri: '{{options.uri}}/pdf?accessKey={options.secret}&url={{default(options.scheme, "https")}}://{{domain}}/wiki/{_encodeURIComponent(title)}%3Fprintable=yes'
# return:
# status: 200
# headers:
# # Firefox supports filename*= syntax while Chrome respect
# # filename=. Safari is stupid and understands neither.
# # TODO: Quote raw `"` chars in filename.
# content-disposition: 'attachment; {{filenameParameters(request.params.title)}}'
# content-type: '{{get_pdf_from_backend.headers.content-type}}'
# content-length: '{{get_pdf_from_backend.headers.content-length}}'
# cache-control: '{{default(options.cache_control, "s-maxage=600, max-age=600")}}'
# etag: '{{get_latest_revision.headers.etag}}'
# body: '{{get_pdf_from_backend.body}}'
x-request-handler:
- get_latest_revision:
request:
method: get
uri: /{domain}/sys/page_revisions/page/{title}
- get_pdf_from_backend:
request:
method: get
# Hard-coded output options will be replaced later by extending
# the RESTBase endpoint with additional paramters. Defaults
# will be A4 and desktop if no parameters are provided.
uri: '{{options.uri}}/{{domain}}/v1/pdf/{title}/a4/desktop'
return:
status: 200
headers:
# Firefox supports filename*= syntax while Chrome respect
# filename=. Safari is stupid and understands neither.
content-disposition: 'attachment; {{filenameParameters(request.params.title)}}'
hknustwmf marked this conversation as resolved.
Show resolved Hide resolved
content-type: '{{get_pdf_from_backend.headers.content-type}}'
content-length: '{{get_pdf_from_backend.headers.content-length}}'
cache-control: '{{default(options.cache_control, "s-maxage=600, max-age=600")}}'
etag: '{{get_latest_revision.headers.etag}}'
body: '{{get_pdf_from_backend.body}}'
x-monitor: false # PDF generation is expensive and it's not stored, so don't run checker script