Skip to content
Permalink
Browse files

Add custom wiki protocols for data access

* wikiapi:///?action=query&list=allpages
  Call to api.php - ignores the path parameter, and only uses the query

* wikirest:///api/rest_v1/page/...
  Call to RESTbase api - requires the path to start with "/api/"

* wikiraw:///MyPage/data
  Get raw content of a wiki page, where the path is the title
  of the page with an additional leading '/' which gets removed.
  Uses mediawiki api, and extract the content after the request

* wikirawupload://upload.wikimedia.org/wikipedia/commons/3/3e/Einstein_1921_by_F_Schmutzer_-_restoration.jpg
  Get an image for the graph, e.g. from commons
  This tag specifies any content from the uploads.* domain, without query params

Change-Id: I6be3f3f30a725d7a4c8ce9cf349f341f2bc19dd6
  • Loading branch information...
nyurik committed Dec 6, 2015
1 parent 5b720ac commit 787d64a118b72f8b26c8f4ded9597a0e5531763e
Showing with 106 additions and 48 deletions.
  1. +1 −2 Graph.body.php
  2. +0 −1 extension.json
  3. +0 −16 modules/graph1.js
  4. +105 −29 modules/graph2.js
@@ -75,9 +75,8 @@ public static function finalizeParserOutput( ParserOutput $output, $title, $isPr
if ( $liveSpecs || $interact ) {
// TODO: these 3 js vars should be per domain if 'ext.graph' is added, not per page
global $wgGraphDataDomains, $wgGraphUrlBlacklist, $wgGraphIsTrusted;
global $wgGraphDataDomains, $wgGraphIsTrusted;
$output->addJsConfigVars( 'wgGraphDataDomains', $wgGraphDataDomains );
$output->addJsConfigVars( 'wgGraphUrlBlacklist', $wgGraphUrlBlacklist );
$output->addJsConfigVars( 'wgGraphIsTrusted', $wgGraphIsTrusted );
$output->addModuleStyles( 'ext.graph' );
@@ -117,7 +117,6 @@
"GraphDataDomains": [],
"GraphDefaultVegaVer": 1,
"GraphEnableGZip": false,
"GraphUrlBlacklist": false,
"GraphIsTrusted": false,
"GraphImgServiceUrl": false
},
@@ -8,7 +8,6 @@
if ( originalSanitize === false ) {
// Make sure we only initialize graphs once
vg.config.domainWhiteList = mw.config.get( 'wgGraphDataDomains' );
vg.config.urlBlackList = mw.config.get( 'wgGraphUrlBlacklist' );
if ( !mw.config.get( 'wgGraphIsTrusted' ) ) {
vg.config.dataHeaders = { 'Treat-as-Untrusted': 1 };
}
@@ -27,21 +26,6 @@
if ( !url ) {
return false;
}
if ( !vg.config.urlBlackListRe ) {
// Lazy initialize urlBlackListRe
if ( vg.config.urlBlackList ) {
vg.config.urlBlackListRe = vg.config.urlBlackList.map( function ( s ) {
return new RegExp( s );
} );
} else {
vg.config.urlBlackListRe = [];
}
}
if ( vg.config.urlBlackListRe.some( function ( re ) {
return re.test( url );
} ) ) {
return false;
}
return url;
};
}
@@ -1,18 +1,114 @@
( function ( $, mw ) {
var originalSanitize;
var originalSanitize, originalLoader;

// Make sure we only initialize graphs once
vg.config.load.domainWhiteList = mw.config.get( 'wgGraphDataDomains' );
vg.config.load.urlBlackList = mw.config.get( 'wgGraphUrlBlacklist' );

vg.util.load.file = vg.util.load.http = function ( url, opt, callback ) {
callback( new Error( 'Loading of ' + url + ' is not allowed' ) );
};

// Override loader so that we can do post-loader data processing
originalLoader = vg.util.load.loader.bind( vg.util.load );
vg.util.load.loader = function ( opt, callback ) {
return originalLoader.call( vg.util.load, opt, function ( error, data ) {
var json;

if ( error ) {
callback( error );
return;
}
if ( opt.isApiCall ) {
// This was an API call - check for errors
json = JSON.parse( data ) ;
if ( json.error ) {
error = new Error( 'API error: ' + JSON.stringify( json.error ) );
data = undefined;
} else {
if ( json.warnings ) {
mw.log( 'API warnings: ' + JSON.stringify( json.warnings ) );
}
if ( opt.extractApiContent ) {
try {
data = json.query.pages[ 0 ].revisions[ 0 ].content;
} catch ( e ) {
data = undefined;
error = new Error( 'Page content not available ' + opt.url );
}
}
}
}
callback( error, data );
} );
};

// Override sanitizer to implement custom protocols and extra validation
originalSanitize = vg.util.load.sanitizeUrl.bind( vg.util.load );
vg.util.load.sanitizeUrl = function ( opt ) {
var url = originalSanitize.apply( vg.util.load, arguments );
if ( !url ) {
return false;
var path, query,
url = new mw.Uri( opt.url );

switch ( url.protocol ) {
case 'http':
case 'https':
// Will disable this as soon as all graphs have been switched to custom protocols
url.path = decodeURIComponent( url.path );
opt.url = url.toString();
return originalSanitize.call( vg.util.load, opt );

case 'wikiapi':
// wikiapi:///?action=query&list=allpages
// Call to api.php - ignores the path parameter, and only uses the query
path = '/w/api.php';
query = $.extend( url.query, { format: 'json', formatversion: 'latest' } );
opt.isApiCall = true;
break;

case 'wikirest':
// wikirest:///api/rest_v1/page/...
// Call to RESTbase api - requires the path to start with "/api/"
if ( !/^\/api\//.test( url.path ) ) {
return false;
}
path = url.path;
query = url.query;
break;

case 'wikiraw':
// wikiraw:///MyPage/data
// Get raw content of a wiki page, where the path is the title
// of the page with an additional leading '/' which gets removed.
// Uses mediawiki api, and extract the content after the request
path = '/w/api.php';
query = {
format: 'json',
formatversion: 'latest',
action: 'query',
prop: 'revisions',
rvprop: 'content',
titles: url.path.substring( 1 )
};
opt.isApiCall = true;
opt.extractApiContent = true;
break;

case 'wikirawupload':
// wikirawupload://upload.wikimedia.org/wikipedia/commons/3/3e/Einstein_1921_by_F_Schmutzer_-_restoration.jpg
// Get an image for the graph, e.g. from commons
// This tag specifies any content from the uploads.* domain, without query params
if ( !/^upload\./.test( url.host ) ) {
return false;
}
path = url.path;
break;
}
// Normalize url by parsing and re-encoding it
url = new mw.Uri( url );

opt.url = new mw.Uri( {
host: url.host,
port: url.port,
path: path,
query: query
} ).toString();

if ( !mw.config.get( 'wgGraphIsTrusted' ) &&
window.location.hostname.toLowerCase() === url.host.toLowerCase()
) {
@@ -21,27 +117,7 @@
opt.headers = { 'Treat-as-Untrusted': 1 };
}

url.path = decodeURIComponent( url.path );
url = url.toString();
if ( !url ) {
return false;
}
if ( !vg.config.load.urlBlackListRe ) {
// Lazy initialize urlBlackListRe
if ( vg.config.load.urlBlackList ) {
vg.config.load.urlBlackListRe = vg.config.load.urlBlackList.map( function ( s ) {
return new RegExp( s );
} );
} else {
vg.config.load.urlBlackListRe = [];
}
}
if ( vg.config.load.urlBlackListRe.some( function ( re ) {
return re.test( url );
} ) ) {
return false;
}
return url;
return originalSanitize.call( vg.util.load, opt );
};

/**

0 comments on commit 787d64a

Please sign in to comment.
You can’t perform that action at this time.