Skip to content

Commit

Permalink
Merge 34a1b1f into 1d0c716
Browse files Browse the repository at this point in the history
  • Loading branch information
hknustwmf committed May 29, 2019
2 parents 1d0c716 + 34a1b1f commit c846869
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 96 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Expand Up @@ -3,11 +3,15 @@ coverage
config.yaml
node_modules
npm-debug.log
package-lock.json

# WebStorm IDE files
.idea/*
*.iml

#VS Code
.vscode

# vim temp files
*swp

Expand Down
6 changes: 1 addition & 5 deletions config.example.wikimedia.yaml
Expand Up @@ -50,11 +50,7 @@ default_project: &default_project
pdf:
# Cache PDF for 5 minutes since it's not purged
cache_control: s-maxage=600, max-age=600
uri: https://pdfrender-beta.wmflabs.org
new_uri: https://proton-beta.wmflabs.org
new_probability: 1
secret: secret
scheme: https
uri: https://proton-beta.wmflabs.org
transform:
cx_host: https://cxserver-beta.wmflabs.org
skip_updates: false
Expand Down
4 changes: 1 addition & 3 deletions config.test.yaml
Expand Up @@ -47,9 +47,7 @@ default_project: &default_project
pdf:
# Cache PDF for 5 minutes since it's not purged
cache_control: s-maxage=600, max-age=600
uri: https://pdfrender-beta.wmflabs.org
new_uri: https://proton-beta.wmflabs.org
secret: secret
uri: https://proton-beta.wmflabs.org
transform:
cx_host: https://cxserver-beta.wmflabs.org
skip_updates: false
Expand Down
14 changes: 6 additions & 8 deletions test/features/pdf.js
Expand Up @@ -9,10 +9,9 @@ describe('PDF Service', () => {
before(() => server.start());
after(() => server.stop());

// TODO: PDF tests a very unreliable, so skip them until T181084 is resolved
it.skip('Should get PDF for a page', () => {
it('Should get PDF for a page', () => {
return preq.get({
uri: `${server.config.bucketURL('ru.wikipedia.org')}/pdf/%D0%94%D0%B0%D1%80%D1%82_%D0%92%D0%B5%D0%B9%D0%B4%D0%B5%D1%80`
uri: `${server.config.bucketURL('en.wikipedia.beta.wmflabs.org')}/pdf/%D0%94%D0%B0%D1%80%D1%82_%D0%92%D0%B5%D0%B9%D0%B4%D0%B5%D1%80`,
})
.then((res) => {
assert.deepEqual(res.status, 200);
Expand All @@ -22,13 +21,12 @@ describe('PDF Service', () => {
assert.deepEqual(res.headers['content-type'], 'application/pdf');
assert.ok(/"\d+\/[\d\w-]+"/.test(res.headers.etag));
assert.ok(res.body.length !== 0);
});
}).timeout(100000);
});

// TODO: PDF tests a very unreliable, so skip them until T181084 is resolved
it.skip('Should get PDF for a page containing a quote in its title', () => {
it('Should get PDF for a page containing a quote in its title', () => {
return preq.get({
uri: `${server.config.bucketURL()}/pdf/"...And_Ladies_of_the_Club"`
uri: `${server.config.bucketURL('en.wikipedia.beta.wmflabs.org')}/pdf/"...And_Ladies_of_the_Club"`,
})
.then((res) => {
assert.deepEqual(res.status, 200);
Expand All @@ -38,6 +36,6 @@ describe('PDF Service', () => {
assert.deepEqual(res.headers['content-type'], 'application/pdf');
assert.ok(/"\d+\/[\d\w-]+"/.test(res.headers.etag));
assert.ok(res.body.length !== 0);
});
}).timeout(100000);
});
});
2 changes: 1 addition & 1 deletion test/features/router/misc.js
Expand Up @@ -12,7 +12,7 @@ function getHeader(res, name) {
}

describe('router - misc', function() {
this.timeout(20000);
this.timeout(100000);
const server = new Server();
before(() => server.start());
after(() => server.stop());
Expand Down
63 changes: 7 additions & 56 deletions v1/pdf.js
@@ -1,70 +1,21 @@
'use strict';

const HyperSwitch = require('hyperswitch');
const URI = HyperSwitch.URI;
const spec = HyperSwitch.utils.loadSpec(`${__dirname}/pdf.yaml`);

function filenameParameters(name) {
// Return two parameters
const encodedName = `${encodeURIComponent(name)}.pdf`;
const quotedName = `"${encodedName.replace(/"/g, '\\"')}"`;
return `filename=${quotedName}; filename*=UTF-8''${encodedName}`;
}

/**
* PDF filename formatting / escaping utilities.
* @param {Object} options
* @return {Object} response with headers and PDF in body
*/

module.exports = (options) => ({
spec,
globals: {
options
},
operations: {
generatePDF: (hyper, req) => {
const rp = req.params;
const newProbability = options.new_probability || 0;
return hyper.get(new URI([rp.domain, 'sys', 'page_revisions', 'page', rp.title]))
.then((latestRevision) => {
if (options.new_uri && (Math.random() < newProbability || req.query.new_pdf)) {
const newResult = hyper.get(new URI(
`${options.new_uri}/${rp.domain}/v1/` +
`pdf/${encodeURIComponent(rp.title)}/a4/desktop`
))
.catch((e) => {
hyper.logger.log('error/proton', e);
if (req.query.new_pdf) {
throw e;
}
});
if (req.query.new_pdf) {
return newResult;
}
}
return hyper.get({
uri: new URI(`${options.uri}/pdf`),
query: {
accessKey: options.secret,
url: `${options.scheme || 'https'}://${rp.domain}/wiki/` +
`${encodeURIComponent(rp.title)}?printable=yes`
}
})
.then((res) => {
return {
status: 200,
headers: {
'content-disposition': `attachment; ${filenameParameters(rp.title)}`,
'content-type': res.headers['content-type'],
'content-length': res.headers['content-length'],
'cache-control': options['cache-control'] ||
's-maxage=600, max-age=600',
etag: latestRevision.headers.etag
},
body: res.body
};
});
});
options,
filenameParameters(name) {
// Return two parameters
const encodedName = `${encodeURIComponent(name)}.pdf`;
const quotedName = `"${encodedName.replace(/"/g, '\\"')}"`;
return `filename=${quotedName}; filename*=UTF-8''${encodedName}`;
}
}
});
46 changes: 23 additions & 23 deletions v1/pdf.yaml
Expand Up @@ -57,27 +57,27 @@ paths:
application/problem+json:
schema:
$ref: '#/components/schemas/problem'
operationId: generatePDF
# x-request-handler:
# - get_latest_revision:
# request:
# method: get
# uri: /{domain}/sys/page_revisions/page/{title}
# - get_pdf_from_backend:
# request:
# method: get
# # Note: The title needs to be encoded twice.
# uri: '{{options.uri}}/pdf?accessKey={options.secret}&url={{default(options.scheme, "https")}}://{{domain}}/wiki/{_encodeURIComponent(title)}%3Fprintable=yes'
# return:
# status: 200
# headers:
# # Firefox supports filename*= syntax while Chrome respect
# # filename=. Safari is stupid and understands neither.
# # TODO: Quote raw `"` chars in filename.
# content-disposition: 'attachment; {{filenameParameters(request.params.title)}}'
# content-type: '{{get_pdf_from_backend.headers.content-type}}'
# content-length: '{{get_pdf_from_backend.headers.content-length}}'
# cache-control: '{{default(options.cache_control, "s-maxage=600, max-age=600")}}'
# etag: '{{get_latest_revision.headers.etag}}'
# body: '{{get_pdf_from_backend.body}}'
x-request-handler:
- get_latest_revision:
request:
method: get
uri: /{domain}/sys/page_revisions/page/{title}
- get_pdf_from_backend:
request:
method: get
# Hard-coded output options will be replaced later by extending
# the RESTBase endpoint with additional paramters. Defaults
# will be A4 and desktop if no parameters are provided.
uri: '{{options.uri}}/{{domain}}/v1/pdf/{title}/a4/desktop'
return:
status: 200
headers:
# Firefox supports filename*= syntax while Chrome respect
# filename=. Safari is stupid and understands neither.
content-disposition: 'attachment; {{filenameParameters(request.params.title)}}'
content-type: '{{get_pdf_from_backend.headers.content-type}}'
content-length: '{{get_pdf_from_backend.headers.content-length}}'
cache-control: '{{default(options.cache_control, "s-maxage=600, max-age=600")}}'
etag: '{{get_latest_revision.headers.etag}}'
body: '{{get_pdf_from_backend.body}}'
x-monitor: false # PDF generation is expensive and it's not stored, so don't run checker script

0 comments on commit c846869

Please sign in to comment.