Skip to content

Commit

Permalink
feat(scan): new field: page_date (from microdata)
Browse files Browse the repository at this point in the history
  • Loading branch information
popstas committed Mar 11, 2021
1 parent 9121faf commit f8eecca
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 2 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ Options:
- request_time
- title
- h1
- page_date
- description
- keywords
- og_title
Expand Down
6 changes: 6 additions & 0 deletions src/presets/fields.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ const fields = [
comment_en: 'h1',
groups: ['seo'],
},
{
name: 'page_date',
comment: 'Дата страницы',
comment_en: 'Page date',
groups: ['content'],
},
{
name: 'description',
comment: 'Description',
Expand Down
1 change: 1 addition & 0 deletions src/presets/scraperFields.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ module.exports = {
'result.request_time',
'result.title',
'result.h1',
'result.page_date',
'result.description',
'result.keywords',
'result.og_title',
Expand Down
16 changes: 14 additions & 2 deletions src/scrap-site.js
Original file line number Diff line number Diff line change
Expand Up @@ -191,11 +191,13 @@ module.exports = async (baseUrl, options = {}) => {
const isCanonical = canonical ?
(canonical == decodeURI(window.location.href) ||
canonical == decodeURI(relUrl) ? 1 : 0) : '';

const result = {
request_time:
window.performance.timing.responseEnd -
window.performance.timing.requestStart,
title: $('title').text(),
page_date: '',
h1: $('h1').text().trim(),
h1_count: $('h1').length,
h2_count: $('h2').length,
Expand Down Expand Up @@ -242,11 +244,21 @@ module.exports = async (baseUrl, options = {}) => {
replace(/https?:\/\/schema\.org\//, ''))).toArray().join(', '),
};

// page date from microformat
const pageDate = $('[itemprop="datePublished"][datetime]').first();
if (pageDate.length === 1) {
const dateStr = pageDate.attr('datetime');
const d = new Date(dateStr);
if (!isNaN(d.getTime())) {
result.page_date = dateStr.substring(0, 10);
}
}

for (let name in customFields) {
try {
result[name] = eval(customFields[name].replace(/`/g, '\''));
} catch(e) {
result[name] = `fiels ${name}: error parse ${customFields[name]}`;
result[name] = `field ${name}: error parse ${customFields[name]}`;
}
// if(name == 'section') result[name] = $('.views-field.views-field-field-section a').text();
}
Expand Down Expand Up @@ -525,7 +537,7 @@ module.exports = async (baseUrl, options = {}) => {
});
crawler.on('maxrequestreached', options => {
if (crawler._browser._connection._closed) return; // catch error after scan
console.log(`\n\n${color.yellow}Max requests reached${color.reset}`);
console.log(`\n${color.yellow}Max requests reached${color.reset}`);
// console.log(`${color.yellow}Please, ignore this error:${color.reset}`);
});

Expand Down

0 comments on commit f8eecca

Please sign in to comment.