From ba54a7ff8a24c6da1ade853ce120d7a1eabc7206 Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Tue, 25 Oct 2016 11:38:02 -0700 Subject: [PATCH 1/9] Added PDF endpoint --- projects/wmf_default.yaml | 3 ++ projects/wmf_wiktionary.yaml | 3 ++ v1/pdf.yaml | 84 ++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+) create mode 100644 v1/pdf.yaml diff --git a/projects/wmf_default.yaml b/projects/wmf_default.yaml index 06b07728e..305a8597b 100644 --- a/projects/wmf_default.yaml +++ b/projects/wmf_default.yaml @@ -54,6 +54,9 @@ paths: - path: v1/random.yaml options: '{{merge({"random_cache_control": "s-maxage=2, max-age=1"}, options.mobileapps)}}' + - path: v1/pdf.yaml + options: + response_cache_control: '{{options.purged_cache_control}}' /feed: x-modules: - path: v1/feed.js diff --git a/projects/wmf_wiktionary.yaml b/projects/wmf_wiktionary.yaml index 40734cb49..46a2f27d1 100644 --- a/projects/wmf_wiktionary.yaml +++ b/projects/wmf_wiktionary.yaml @@ -50,6 +50,9 @@ paths: options: response_cache_control: '{{options.purged_cache_control}}' host: '{{options.mobileapps.host}}' + - path: v1/pdf.yaml + options: + response_cache_control: '{{options.purged_cache_control}}' /transform: x-modules: - path: v1/transform.yaml diff --git a/v1/pdf.yaml b/v1/pdf.yaml new file mode 100644 index 000000000..5b8d637e1 --- /dev/null +++ b/v1/pdf.yaml @@ -0,0 +1,84 @@ +swagger: '2.0' +info: + version: '1.0.0-beta' + title: MediaWiki PDF API + description: Page PDF Render API + termsOfService: https://github.com/wikimedia/restbase#restbase + contact: + name: Services + email: services@lists.wikimedia.org + url: https://www.mediawiki.org/wiki/Services + license: + name: Apache licence, v2 + url: https://www.apache.org/licenses/LICENSE-2.0 +paths: + /pdf/{title}: + x-route-filters: + - path: ./lib/revision_table_access_check_filter.js + options: + redirect_cache_control: '{{options.response_cache_control}}' + get: + tags: + - Page content + summary: Get a page as PDF + description: | + Renders the page content as PDF. + + Stability: [experimental](https://www.mediawiki.org/wiki/API_versioning#Experimental) + produces: + - application/pdf + parameters: + - name: title + in: path + description: "Page title. Use underscores instead of spaces. Example: `Main_Page`." + type: string + required: true + responses: + '200': + description: The PDF render of an article + schema: + type: file + # TODO: Add an etag somehow. How??? + #headers: + # ETag: + # description: > + # Syntax: "{revision}/{tid}". + # Example: "701384379/154d7bca-c264-11e5-8c2f-1b51b33b59fc" + '404': + description: Unknown page title + schema: + $ref: '#/definitions/problem' + default: + description: Error + schema: + $ref: '#/definitions/problem' + + x-request-handler: + - get_pdf_from_backend: + request: + method: get + uri: 'https://pdf-electron.wmflabs.org/pdf?accessKey=secret&url=https://{{domain}}/wiki/{title}' + - return_response: + return: + status: 200 + headers: + content-disposition: 'attachment; filename={{request.params.title}}.pdf' + content-type: '{{get_pdf_from_backend.headers.content-type}}' + content-length: '{{get_pdf_from_backend.headers.content-length}}' + cache-control: '{{options.response_cache_control}}' + body: '{{get_pdf_from_backend.body}}' + +definitions: + # A https://tools.ietf.org/html/draft-nottingham-http-problem + problem: + required: + - type + properties: + type: + type: string + title: + type: string + detail: + type: string + instance: + type: string \ No newline at end of file From 7fb1d2d565210933343c1dfe83ae275dbc3e26ed Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Tue, 25 Oct 2016 12:28:03 -0700 Subject: [PATCH 2/9] Decreased the default cache TTL --- config.example.wikimedia.yaml | 2 ++ projects/wmf_default.yaml | 2 +- projects/wmf_wiktionary.yaml | 2 +- v1/pdf.yaml | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/config.example.wikimedia.yaml b/config.example.wikimedia.yaml index b0a4dae3c..010d14a2a 100644 --- a/config.example.wikimedia.yaml +++ b/config.example.wikimedia.yaml @@ -33,6 +33,8 @@ default_project: &default_project cache_control: s-maxage=86400, max-age=86400 # 10 days Varnish caching, one day client-side purged_cache_control: s-maxage=864000, max-age=86400 + # Cache PDF for 5 minutes since it's not purged + pdf_cache_control: s-maxage=300, max-age=300 skip_updates: false # A different project template, sharing configuration options. diff --git a/projects/wmf_default.yaml b/projects/wmf_default.yaml index 305a8597b..de1aa74a6 100644 --- a/projects/wmf_default.yaml +++ b/projects/wmf_default.yaml @@ -56,7 +56,7 @@ paths: options.mobileapps)}}' - path: v1/pdf.yaml options: - response_cache_control: '{{options.purged_cache_control}}' + response_cache_control: '{{options.pdf_cache_control}}' /feed: x-modules: - path: v1/feed.js diff --git a/projects/wmf_wiktionary.yaml b/projects/wmf_wiktionary.yaml index 46a2f27d1..8a24eedae 100644 --- a/projects/wmf_wiktionary.yaml +++ b/projects/wmf_wiktionary.yaml @@ -52,7 +52,7 @@ paths: host: '{{options.mobileapps.host}}' - path: v1/pdf.yaml options: - response_cache_control: '{{options.purged_cache_control}}' + response_cache_control: '{{options.pdf_cache_control}}' /transform: x-modules: - path: v1/transform.yaml diff --git a/v1/pdf.yaml b/v1/pdf.yaml index 5b8d637e1..ae3da5a00 100644 --- a/v1/pdf.yaml +++ b/v1/pdf.yaml @@ -65,7 +65,7 @@ paths: content-disposition: 'attachment; filename={{request.params.title}}.pdf' content-type: '{{get_pdf_from_backend.headers.content-type}}' content-length: '{{get_pdf_from_backend.headers.content-length}}' - cache-control: '{{options.response_cache_control}}' + cache-control: '{{default(options.response_cache_control, "s-maxage=300, max-age=300")}}' body: '{{get_pdf_from_backend.body}}' definitions: From 5b87d659dc952a7ca73ffee34bf5039ce7f38d15 Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Tue, 1 Nov 2016 10:21:52 -0700 Subject: [PATCH 3/9] Updated the config to use the `return` stanza --- v1/pdf.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/v1/pdf.yaml b/v1/pdf.yaml index ae3da5a00..ad18c6fee 100644 --- a/v1/pdf.yaml +++ b/v1/pdf.yaml @@ -58,7 +58,6 @@ paths: request: method: get uri: 'https://pdf-electron.wmflabs.org/pdf?accessKey=secret&url=https://{{domain}}/wiki/{title}' - - return_response: return: status: 200 headers: From e791f8b25c55d556a4cba88300d5287d2f2fb5af Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Thu, 24 Nov 2016 11:13:58 -0800 Subject: [PATCH 4/9] Made the service URI configurable --- config.example.wikimedia.yaml | 7 +++++-- config.test.yaml | 5 +++++ projects/wmf_default.yaml | 3 +-- projects/wmf_wiktionary.yaml | 3 +-- v1/pdf.yaml | 20 ++++++++++++-------- 5 files changed, 24 insertions(+), 14 deletions(-) diff --git a/config.example.wikimedia.yaml b/config.example.wikimedia.yaml index 010d14a2a..204671a0d 100644 --- a/config.example.wikimedia.yaml +++ b/config.example.wikimedia.yaml @@ -33,8 +33,11 @@ default_project: &default_project cache_control: s-maxage=86400, max-age=86400 # 10 days Varnish caching, one day client-side purged_cache_control: s-maxage=864000, max-age=86400 - # Cache PDF for 5 minutes since it's not purged - pdf_cache_control: s-maxage=300, max-age=300 + pdf: + # Cache PDF for 5 minutes since it's not purged + cache_control: s-maxage=600, max-age=600 + uri: https://pdf-electron.wmflabs.org/pdf + secret: secret skip_updates: false # A different project template, sharing configuration options. diff --git a/config.test.yaml b/config.test.yaml index b82693fbf..fa0e07560 100644 --- a/config.test.yaml +++ b/config.test.yaml @@ -38,6 +38,11 @@ default_project: &default_project host: http://appservice.wmflabs.org events: {} purged_cache_control: test_purged_cache_control + pdf: + # Cache PDF for 5 minutes since it's not purged + cache_control: s-maxage=600, max-age=600 + uri: https://deployment-pdfrender.deployment-prep.eqiad.wmflabs/pdf + secret: secret skip_updates: false labs_project: &labs_project diff --git a/projects/wmf_default.yaml b/projects/wmf_default.yaml index de1aa74a6..b84a1f24e 100644 --- a/projects/wmf_default.yaml +++ b/projects/wmf_default.yaml @@ -55,8 +55,7 @@ paths: options: '{{merge({"random_cache_control": "s-maxage=2, max-age=1"}, options.mobileapps)}}' - path: v1/pdf.yaml - options: - response_cache_control: '{{options.pdf_cache_control}}' + options: '{{options.pdf}}' /feed: x-modules: - path: v1/feed.js diff --git a/projects/wmf_wiktionary.yaml b/projects/wmf_wiktionary.yaml index 8a24eedae..b350674bd 100644 --- a/projects/wmf_wiktionary.yaml +++ b/projects/wmf_wiktionary.yaml @@ -51,8 +51,7 @@ paths: response_cache_control: '{{options.purged_cache_control}}' host: '{{options.mobileapps.host}}' - path: v1/pdf.yaml - options: - response_cache_control: '{{options.pdf_cache_control}}' + options: '{{options.pdf}}' /transform: x-modules: - path: v1/transform.yaml diff --git a/v1/pdf.yaml b/v1/pdf.yaml index ad18c6fee..d111a6a14 100644 --- a/v1/pdf.yaml +++ b/v1/pdf.yaml @@ -38,12 +38,11 @@ paths: description: The PDF render of an article schema: type: file - # TODO: Add an etag somehow. How??? - #headers: - # ETag: - # description: > - # Syntax: "{revision}/{tid}". - # Example: "701384379/154d7bca-c264-11e5-8c2f-1b51b33b59fc" + headers: + ETag: + description: > + Syntax: "{revision}/{tid}". + Example: "701384379/154d7bca-c264-11e5-8c2f-1b51b33b59fc" '404': description: Unknown page title schema: @@ -54,17 +53,22 @@ paths: $ref: '#/definitions/problem' x-request-handler: + - get_latest_revision: + request: + method: get + uri: /{domain}/sys/page_revisions/page/{title} - get_pdf_from_backend: request: method: get - uri: 'https://pdf-electron.wmflabs.org/pdf?accessKey=secret&url=https://{{domain}}/wiki/{title}' + uri: '{{options.uri}}?accessKey={options.secret}&url=https://{{domain}}/wiki/{title}' return: status: 200 headers: content-disposition: 'attachment; filename={{request.params.title}}.pdf' content-type: '{{get_pdf_from_backend.headers.content-type}}' content-length: '{{get_pdf_from_backend.headers.content-length}}' - cache-control: '{{default(options.response_cache_control, "s-maxage=300, max-age=300")}}' + cache-control: '{{default(options.cache_control, "s-maxage=600, max-age=600")}}' + etag: '{{get_latest_revision.headers.etag}}' body: '{{get_pdf_from_backend.body}}' definitions: From 732770abb8a25a558a329ca9cdcfe3049e53c579 Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Thu, 24 Nov 2016 12:23:15 -0800 Subject: [PATCH 5/9] Added a test for PDF --- config.test.yaml | 2 +- test/features/feed.js | 1 - test/features/pdf.js | 23 +++++++++++++++++++++++ v1/pdf.yaml | 2 +- 4 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 test/features/pdf.js diff --git a/config.test.yaml b/config.test.yaml index fa0e07560..ab734736a 100644 --- a/config.test.yaml +++ b/config.test.yaml @@ -41,7 +41,7 @@ default_project: &default_project pdf: # Cache PDF for 5 minutes since it's not purged cache_control: s-maxage=600, max-age=600 - uri: https://deployment-pdfrender.deployment-prep.eqiad.wmflabs/pdf + uri: https://pdf-electron.wmflabs.org/pdf secret: secret skip_updates: false diff --git a/test/features/feed.js b/test/features/feed.js index 9bac73b52..eca6a22f3 100644 --- a/test/features/feed.js +++ b/test/features/feed.js @@ -3,7 +3,6 @@ const assert = require('../utils/assert.js'); const server = require('../utils/server.js'); const preq = require('preq'); -const P = require('bluebird'); function assertStorageRequest(requests, method, bucket, expected) { const storageRequests = requests.filter((log) => diff --git a/test/features/pdf.js b/test/features/pdf.js new file mode 100644 index 000000000..ca7e235c5 --- /dev/null +++ b/test/features/pdf.js @@ -0,0 +1,23 @@ +'use strict'; + +const assert = require('../utils/assert.js'); +const server = require('../utils/server.js'); +const preq = require('preq'); + +describe('Feed', () => { + + before(() => server.start()); + + it('Should get PDF for a page', () => { + return preq.get({ + uri: `${server.config.bucketURL}/pdf/Test` + }) + .then((res) => { + assert.deepEqual(res.status, 200); + assert.deepEqual(res.headers['content-disposition'], 'attachment; filename=Test.pdf'); + assert.deepEqual(res.headers['content-type'], 'application/pdf'); + assert.ok(/"\d+\/[\d\w-]+"/.test(res.headers.etag)); + assert.ok(res.body.length !== 0); + }); + }); +}); diff --git a/v1/pdf.yaml b/v1/pdf.yaml index d111a6a14..d7908c929 100644 --- a/v1/pdf.yaml +++ b/v1/pdf.yaml @@ -16,7 +16,7 @@ paths: x-route-filters: - path: ./lib/revision_table_access_check_filter.js options: - redirect_cache_control: '{{options.response_cache_control}}' + redirect_cache_control: '{{options.cache_control}}' get: tags: - Page content From 3de4f6c6f496dda0560c1fc0051bbbba4e7509cc Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Thu, 24 Nov 2016 12:24:32 -0800 Subject: [PATCH 6/9] Don't monitor the PDF endpoint with service-checker --- v1/pdf.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/v1/pdf.yaml b/v1/pdf.yaml index d7908c929..58d6f0830 100644 --- a/v1/pdf.yaml +++ b/v1/pdf.yaml @@ -70,6 +70,7 @@ paths: cache-control: '{{default(options.cache_control, "s-maxage=600, max-age=600")}}' etag: '{{get_latest_revision.headers.etag}}' body: '{{get_pdf_from_backend.body}}' + x-monitor: false # PDF generation is expensive and it's not stored, so don't run checker script definitions: # A https://tools.ietf.org/html/draft-nottingham-http-problem From f646340419995479d997086dd2b94917452c8bd7 Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Thu, 24 Nov 2016 12:51:39 -0800 Subject: [PATCH 7/9] Test for russian wiki to examine correct title encoding --- lib/security_response_header_filter.js | 2 +- test/features/pdf.js | 6 ++++-- v1/pdf.yaml | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/security_response_header_filter.js b/lib/security_response_header_filter.js index 84a1adde2..fe06bb76e 100644 --- a/lib/security_response_header_filter.js +++ b/lib/security_response_header_filter.js @@ -106,4 +106,4 @@ module.exports = function addCSPHeaders(hyper, req, next, options) { cspHeaderVariants.forEach((name) => { rh[name] = csp; }); return res; }); -}; +}; \ No newline at end of file diff --git a/test/features/pdf.js b/test/features/pdf.js index ca7e235c5..348d2d309 100644 --- a/test/features/pdf.js +++ b/test/features/pdf.js @@ -10,11 +10,13 @@ describe('Feed', () => { it('Should get PDF for a page', () => { return preq.get({ - uri: `${server.config.bucketURL}/pdf/Test` + uri: `${server.config.hostPort}/ru.wikipedia.org/v1/page/pdf/%D0%94%D0%B0%D1%80%D1%82_%D0%92%D0%B5%D0%B9%D0%B4%D0%B5%D1%80` }) .then((res) => { assert.deepEqual(res.status, 200); - assert.deepEqual(res.headers['content-disposition'], 'attachment; filename=Test.pdf'); + assert.deepEqual(res.headers['content-disposition'], + 'attachment; filename=%D0%94%D0%B0%D1%80%D1%82_%D0%92%D0%B5%D0%B9%D0%B4%D0%B5%D1%80.pdf;' + + ' filename*=%D0%94%D0%B0%D1%80%D1%82_%D0%92%D0%B5%D0%B9%D0%B4%D0%B5%D1%80.pdf'); assert.deepEqual(res.headers['content-type'], 'application/pdf'); assert.ok(/"\d+\/[\d\w-]+"/.test(res.headers.etag)); assert.ok(res.body.length !== 0); diff --git a/v1/pdf.yaml b/v1/pdf.yaml index 58d6f0830..ee4d1a3d8 100644 --- a/v1/pdf.yaml +++ b/v1/pdf.yaml @@ -64,7 +64,8 @@ paths: return: status: 200 headers: - content-disposition: 'attachment; filename={{request.params.title}}.pdf' + # Firefox supports filename*= syntax while Chrome respect filename=. Safari is stupid and understands neither + content-disposition: 'attachment; filename={request.params.title}.pdf; filename*={request.params.title}.pdf' content-type: '{{get_pdf_from_backend.headers.content-type}}' content-length: '{{get_pdf_from_backend.headers.content-length}}' cache-control: '{{default(options.cache_control, "s-maxage=600, max-age=600")}}' From bd93c0cca88cb54273cbe83dd9fce06f1ef8b779 Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Thu, 24 Nov 2016 13:10:18 -0800 Subject: [PATCH 8/9] Fixed lint errors --- lib/security_response_header_filter.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/security_response_header_filter.js b/lib/security_response_header_filter.js index fe06bb76e..84a1adde2 100644 --- a/lib/security_response_header_filter.js +++ b/lib/security_response_header_filter.js @@ -106,4 +106,4 @@ module.exports = function addCSPHeaders(hyper, req, next, options) { cspHeaderVariants.forEach((name) => { rh[name] = csp; }); return res; }); -}; \ No newline at end of file +}; From 7cdee921a7609b40703a2e68f3c46febd05e9b14 Mon Sep 17 00:00:00 2001 From: Petr Pchelko Date: Fri, 25 Nov 2016 10:15:55 -0800 Subject: [PATCH 9/9] Pushed the /pdf part away from config --- config.example.wikimedia.yaml | 2 +- config.test.yaml | 2 +- v1/pdf.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config.example.wikimedia.yaml b/config.example.wikimedia.yaml index 204671a0d..c63df098e 100644 --- a/config.example.wikimedia.yaml +++ b/config.example.wikimedia.yaml @@ -36,7 +36,7 @@ default_project: &default_project pdf: # Cache PDF for 5 minutes since it's not purged cache_control: s-maxage=600, max-age=600 - uri: https://pdf-electron.wmflabs.org/pdf + uri: https://pdf-electron.wmflabs.org secret: secret skip_updates: false diff --git a/config.test.yaml b/config.test.yaml index ab734736a..6c5a00fbb 100644 --- a/config.test.yaml +++ b/config.test.yaml @@ -41,7 +41,7 @@ default_project: &default_project pdf: # Cache PDF for 5 minutes since it's not purged cache_control: s-maxage=600, max-age=600 - uri: https://pdf-electron.wmflabs.org/pdf + uri: https://pdf-electron.wmflabs.org secret: secret skip_updates: false diff --git a/v1/pdf.yaml b/v1/pdf.yaml index ee4d1a3d8..73f5c398d 100644 --- a/v1/pdf.yaml +++ b/v1/pdf.yaml @@ -60,7 +60,7 @@ paths: - get_pdf_from_backend: request: method: get - uri: '{{options.uri}}?accessKey={options.secret}&url=https://{{domain}}/wiki/{title}' + uri: '{{options.uri}}/pdf?accessKey={options.secret}&url=https://{{domain}}/wiki/{title}' return: status: 200 headers: