Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 9babc46
Showing
8 changed files
with
117 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
node_modules | ||
build |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# empty to just ignore .gitignore and include the 'build' folder |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# WikiCite data | ||
|
||
This repository contains scripts to extract, transform, and analyze bibliographic data from Wikidata. | ||
|
||
[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) | ||
[![Node](https://img.shields.io/badge/node-%3E=%20v6.4.0-brightgreen.svg)](http://nodejs.org) | ||
|
||
Source code is based and makes use of the modules [wikidata-filter](https://www.npmjs.com/package/wikidata-filter) | ||
and [wikidata-sdk](https://www.npmjs.com/package/wikidata-sdk) by Maxime Lathuilière. | ||
|
||
## Usage | ||
|
||
bzcat latest-all.json.bz2 | ./bin/wikicite-extract > wikicite.ndjson | ||
|
||
## License | ||
|
||
[MIT](LICENSE.md) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/usr/bin/env node | ||
var path = require('path') | ||
require(path.join(__dirname, '..', 'index.js')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#!/usr/bin/env node | ||
|
||
const split = require('split') | ||
const filter = require('wikidata-filter/lib/filter') | ||
const getClasses = require('./lib/get_classes') | ||
|
||
getClasses('Q732577', (classes) => { | ||
const itemFilter = require('./lib/item_filter')(classes) | ||
|
||
process.stdin | ||
.pipe(split()) | ||
.pipe(filter(itemFilter)) | ||
.pipe(process.stdout) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
const wdk = require('wikidata-sdk') | ||
const request = require('request') | ||
|
||
module.exports = function (root, callback) { | ||
const sparql = "SELECT ?type WHERE { ?type wdt:P279* wd:"+root+" }" | ||
const url = wdk.sparqlQuery(sparql) | ||
|
||
request(url, (error, response, body) => { | ||
const results = JSON.parse(body).results | ||
const classes = results.bindings.map( (value) => { | ||
return value.type.value.substr(31) | ||
}) | ||
callback(classes) | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
const parseLine = require('wikidata-filter/lib/parse_line') | ||
const wdk = require('wikidata-sdk') | ||
|
||
module.exports = function (types) { | ||
|
||
// store types in an object for fast lookup | ||
const typeMap = types.reduce(function(map, qid) { | ||
map[qid] = true; | ||
return map; | ||
}, {}); | ||
|
||
return (line) => { | ||
const item = parseLine(line) | ||
if (!item || item.type != 'item') return null | ||
if (!item.claims.P31) return null | ||
|
||
try { // https://github.com/maxlath/wikidata-sdk/issues/17 | ||
simplify(item, 'claims') | ||
} catch (e) { | ||
return null | ||
} | ||
|
||
if (!filterType(item.claims.P31, typeMap)) return null | ||
|
||
simplify(item, 'labels') | ||
simplify(item, 'descriptions') | ||
simplify(item, 'aliases') | ||
simplify(item, 'sitelinks') | ||
|
||
return JSON.stringify(item) + '\n' | ||
} | ||
} | ||
|
||
const filterType = (P31, types) => { | ||
for(var i=0; i<P31.length; i++) { | ||
if (P31[i] in types) return true; | ||
} | ||
return false | ||
} | ||
|
||
const simplify = (item, attr) => { | ||
if (item[attr]) { | ||
item[attr] = wdk.simplify[attr](item[attr]) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
{ | ||
"name": "wikicite-data", | ||
"version": "0.0.1", | ||
"description": "Extract and transform bibliographic data from Wikidata", | ||
"main": "index.js", | ||
"dependencies": { | ||
"request": "^2.0", | ||
"wikidata-filter": "^2.0" | ||
}, | ||
"devDependencies": {}, | ||
"scripts": { | ||
"test": "echo \"Error: no test specified\" && exit 1" | ||
}, | ||
"keywords": [ | ||
"wikidata", | ||
"wikicite" | ||
], | ||
"author": "Jakob Voss <voss@gbv.de>", | ||
"license": "MIT" | ||
} |