Skip to content

Commit

Permalink
Added Vie Publique support (public reports and speeches)
Browse files Browse the repository at this point in the history
  • Loading branch information
carnetdethese committed Apr 19, 2024
1 parent 3caf783 commit 368bff7
Showing 1 changed file with 175 additions and 0 deletions.
175 changes: 175 additions & 0 deletions Vie Publique.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
{
"translatorID": "858fa86d-82e2-43ca-9fc7-cf75b98101cb",
"label": "Vie Publique",
"creator": "Alexandre Mimms",
"target": "https?://(?:www.)?vie-publique(?:.fr)?",
"minVersion": "5.0",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2024-04-19 05:51:17"
}

/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2024 Alexandre Mimms
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/


function scrapeRapport(doc, url) {
const titre = text(doc, "h1", 0);
const auteursString = doc.querySelectorAll(".book--author a");
const auteursMorauxString = doc.querySelectorAll(".book--author-moral a");
const abstract = text(doc, "#fiche-item-présentation");
const information = doc.querySelectorAll(".tabpanel--technique--details li");
const date = text(doc, ".field--name-field-date-remise", 0);
const page = information[1].innerText.replace("Pagination : ", "").replace(" pages", "");
const reportType = information[0].innerText.replace("Type de document : ", "");
const pdfLink = doc.querySelectorAll(".book--actionsBox a")[0].href;
const tags = doc.querySelectorAll(".vp-item-tag");

let newItem = new Z.Item('report');
newItem.title = titre || "";
newItem.date = date;
newItem.institution = auteursMorauxString[0].innerText;
newItem.abstractNote = abstract;
newItem.pages = page;
newItem.reportType = reportType;
newItem.url = url;

newItem.attachments = [{
url: pdfLink,
title: "Full Text PDF",
mimeType: "application/pdf",
snapshot: false
}];

for (let aut of auteursString) {
const autNames = aut.innerText.split(" ");

newItem.creators.push({
firstName: autNames[0],
lastName: autNames[1],
creatorType: "author",
fieldMode: true,
});
}

for (let tag of tags) {
newItem.tags.push(tag.innerText);
}


if (auteursMorauxString.length > 1) {
for (let autMoral of auteursMorauxString) {
newItem.institution += ", " + autMoral.innerText;
}
}

newItem.complete();
}

function scrapeSpeech(doc, url) {
const titre = text(doc, "h1", 0);
const auteursString = doc.querySelectorAll(".line-intervenant a");
const date = text(doc, ".datetime", 0);
const tags = doc.querySelectorAll(".vp-item-tag");

let newItem = new Z.Item('presentation');
newItem.title = titre || "";
newItem.date = date;
newItem.url = url;

for (let aut of auteursString) {
const autNames = aut.innerText.split(" ");

newItem.creators.push({
firstName: autNames[0],
lastName: autNames[1],
creatorType: "author",
fieldMode: true,
});
}

for (let tag of tags) {
newItem.tags.push(tag.innerText);
}

newItem.complete();
}

function detectWeb(doc, url) {
// TODO: adjust the logic here
if (url.includes('/rapport')) {
return 'report';
}
else if (url.includes('/discours')) {
return 'presentation';
}
else if (url.includes('/recherche')) {
return 'multiple';
}
return false;
}

function getSearchResults(doc, checkOnly) {
var items = {};
var found = false;
// TODO: adjust the CSS selector
var rows = doc.querySelectorAll('h2 > a.title[href*="/article/"]');
for (let row of rows) {
// TODO: check and maybe adjust
let href = row.href;
// TODO: check and maybe adjust
let title = ZU.trimInternal(row.textContent);
if (!href || !title) continue;
if (checkOnly) return true;
found = true;
items[href] = title;
}
return found ? items : false;
}

async function doWeb(doc, url) {
const docType = detectWeb(doc, url);
if (docType == 'multiple') {
let items = await Zotero.selectItems(getSearchResults(doc, false));
if (!items) return;
for (let url of Object.keys(items)) {
await scrape(await requestDocument(url));
}
}
else {
await scrape(doc, url, docType);
}
}

async function scrape(doc, url = doc.location.href, docType) {
if (docType == "report") {
scrapeRapport(doc, url);
}
else if (docType == "presentation") {
scrapeSpeech(doc, url);
}
}

0 comments on commit 368bff7

Please sign in to comment.