Skip to content

Commit

Permalink
Added Jus Politicum and Lextenso support.
Browse files Browse the repository at this point in the history
  • Loading branch information
carnetdethese committed Apr 18, 2024
1 parent 83629be commit 1e46531
Show file tree
Hide file tree
Showing 2 changed files with 295 additions and 0 deletions.
112 changes: 112 additions & 0 deletions Jus Politicum.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
{
"translatorID": "aeb7f19b-0907-4117-bef4-08e36af4d31f",
"label": "Jus Politicum",
"creator": "Alexandre Mimms",
"target": "https?://(?:www[.-])?juspoliticum.com",
"minVersion": "5.0",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2024-04-18 16:40:24"
}

/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2024 Alexandre Mimms
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/


function detectWeb(doc, url) {
if (url.includes('/article/')) {
return 'journalArticle';
}
else if (url.includes('/searches')) {
return 'multiple';
}
return false;
}

function getSearchResults(doc, checkOnly) {
var items = {};
var found = false;
var rows = doc.querySelectorAll('#search-section h2 a');
for (let row of rows) {
let href = row.href;
let title = ZU.trimInternal(row.textContent);
if (!href || !title) continue;
if (checkOnly) return true;
found = true;
items[href] = title;
}
return found ? items : false;
}

async function doWeb(doc, url) {
if (detectWeb(doc, url) == 'multiple') {
let items = await Zotero.selectItems(getSearchResults(doc, false));
if (!items) return;
for (let url of Object.keys(items)) {
await scrape(await requestDocument(url));
}
}
else {
await scrape(doc, url);
}
}

async function scrape(doc, url = doc.location.href) {
const abstract = ZU.trimInternal(text(doc, "#content"));
const titre = ZU.trimInternal(text(doc, "h2"))
const numero = text(doc, ".release-title .num").replace("N°", "");
const linkURL = doc.querySelectorAll(".documentsAssocies a")[0].href;
const auteurs = text(doc, ".article-author").split(", ");

let newItem = new Zotero.Item("journalArticle");

for (let auteur of auteurs) {
const auteurNames = auteur.split(" ");
newItem.creators.push({
firstName: auteurNames[0],
lastName: auteurNames[1],
creatorType: "author",
fieldMode: true
})
}

newItem.title = titre;
newItem.issue = numero;
newItem.abstractNote = abstract;

newItem.attachments = [{
url: linkURL,
title: "Full text PDF",
mimeType: "application/pdf",
}];

newItem.complete();
}

/** BEGIN TEST CASES **/
var testCases = [
]
/** END TEST CASES **/
183 changes: 183 additions & 0 deletions Lextenso.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
{
"translatorID": "3243d081-22c0-452c-8298-9d8a9fb5de2f",
"label": "Lextenso",
"creator": "Alexandre Mimms",
"target": "https?://(?:www[.-])?labase[.-]lextenso[.-](?:[.-]fr)?",
"minVersion": "5.0",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2024-04-18 16:17:04"
}

/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2022 Alexandre Mimms
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/


async function scrapeJournalArticle(doc, url) {
const references = ZU.trimInternal(text(doc, ".document-metadata-origin").replace("Issu de ", "")).split(" - ");
const revue = references[0];
const numeroRevue = references[1];
const page = references[2];
const titre = ZU.trimInternal(text(doc, "#page-title"));
const auteurs = doc.querySelectorAll(".document-metadata-authors-name");
const abstract = ZU.trimInternal(text(doc, ".cChapeau", 0));
const date = ZU.trimInternal(text(doc, ".document-metadata-date", 0).replace("Date de parution : ", ""));


let newItem = new Z.Item("journalArticle");
newItem.title = titre;

for (let auteur of auteurs) {
auteurNames = auteur.innerText.split(" ");
newItem.creators.push({
firstName: auteurNames[0],
lastName: auteurNames[1],
creatorType: "author",
fieldMode: true,
});
}

newItem.date = date;
newItem.abstractNote = abstract;
newItem.publicationTitle = revue;
newItem.issue = numeroRevue.replace(/n°[0]?/, "");
newItem.pages = page.replace(/page\s?/, "");
newItem.url = url;
newItem.language = "french";
newItem.complete();
}

async function scrapeBook(doc, url) {
// weirdly enough no real information is displayed on the book summary page, but
// some info, like ISBN, is shown on individual pages.
// So, we get the first url to one of those individual pages, then request it so we
// can fetch the information.
// I did not yet find a way to fetch the number of page or edition.
// I tried accessing the link of the shop, where those are displayed, but the request
// fails.
const firstItemUrl = doc.querySelectorAll(".book-summary-list li a")[0].href;
Z.debug(firstItemUrl);
const indivPage = await requestDocument(firstItemUrl);

// Accessing the metadata - reversing the list, since there can be multiple authors
// the end of the list will always be the same, so easier and surer to do it like that.
const ref = text(indivPage, ".document-metadata-ref .value", 0).split(", ").reverse();
const date = ref[2];
const publisher = ref[1];
const isbn = ref[0];

const auteurs = indivPage.querySelectorAll(".document-metadata-authors-name");

const titre = text(doc, "#page-title");

let newItem = new Z.Item("book");
newItem.title = titre;

for (let auteur of auteurs) {
auteurNames = auteur.innerText.split(" ");
newItem.creators.push({
firstName: auteurNames[0],
lastName: auteurNames[1],
creatorType: "author",
fieldMode: true,
});
}

newItem.date = date;
newItem.publisher = publisher;
newItem.ISBN = isbn;
// newItem.pages = page.replace(/page\s?/, "");
newItem.url = url;
newItem.language = "french";
newItem.complete();
}

function detectWeb(doc, url) {
// TODO: adjust the logic here
if (url.includes('/lextenso/rechercher')) {
return 'multiple';
}
else if (doc.querySelectorAll(".node-type-ouvrage").length > 0) {
return 'book';
}
else if (doc.querySelectorAll(".lextenso-document-article").length > 0) {
return 'journalArticle';
}
return false;
}

function getSearchResults(doc, checkOnly) {
var items = {};
var found = false;
// TODO: adjust the CSS selector
var rows = doc.querySelectorAll('h2 > a.title[href*="/article/"]');
for (let row of rows) {
// TODO: check and maybe adjust
let href = row.href;
// TODO: check and maybe adjust
let title = ZU.trimInternal(row.textContent);
if (!href || !title) continue;
if (checkOnly) return true;
found = true;
items[href] = title;
}
return found ? items : false;
}

async function doWeb(doc, url) {
const docType = detectWeb(doc, url);
if (docType == 'multiple') {
let items = await Zotero.selectItems(getSearchResults(doc, false));
if (!items) return;
for (let url of Object.keys(items)) {
await scrape(await requestDocument(url), docType);
}
}
else {
await scrape(doc, url, docType);
}
}

async function scrape(doc, url = doc.location.href, docType) {
if (docType == "journalArticle") {
scrapeJournalArticle(doc, url);
}
else if (docType == "book") {
scrapeBook(doc, url);
}

}

/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "https://www-labase-lextenso-fr.docelec-u-paris2.idm.oclc.org/",
"detectedItemType": false,
"items": []
}
]
/** END TEST CASES **/

0 comments on commit 1e46531

Please sign in to comment.