From c000c683d98c59d9a09163d3f916fe00c7205cd5 Mon Sep 17 00:00:00 2001 From: Yoran Brondsema Date: Tue, 13 Feb 2018 09:07:37 +0100 Subject: [PATCH] Fetch the text direction in parseGeneral --- README.md | 1 + lib/index.js | 3 ++- test/scraping.js | 18 +++++++++++++++++- test/static.js | 2 +- test/static/turtle_article.html | 2 +- test/static/turtle_article.json | 1 + test/static/turtle_article_case.html | 2 +- 7 files changed, 24 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 6951275..4131a2e 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,7 @@ The method parseGeneral obtains the following general metadata: + ``` ## Tests diff --git a/lib/index.js b/lib/index.js index b3283dd..43f993a 100644 --- a/lib/index.js +++ b/lib/index.js @@ -302,7 +302,8 @@ exports.parseGeneral = BBPromise.method(function(chtml){ robots: chtml('meta[name=robots i]').first().attr('content'), //robots shortlink: chtml('link[rel=shortlink i]').first().attr('href'), //short link title: chtml('title').first().text(), //title tag - lang: chtml('html').first().attr('lang') || chtml('html').first().attr('xml:lang') //lang <html lang=""> or <html xml:lang=""> + lang: chtml('html').first().attr('lang') || chtml('html').first().attr('xml:lang'), //lang <html lang=""> or <html xml:lang=""> + dir: chtml('html').first().attr('dir') //dir <html dir=""> }; // Copy key-value pairs with defined values to meta diff --git a/test/scraping.js b/test/scraping.js index 7b5ce30..8378b24 100644 --- a/test/scraping.js +++ b/test/scraping.js @@ -101,7 +101,7 @@ describe('scraping', function() { url: "http://www.lemonde.fr", headers: { 'User-Agent': 'webscraper' - } + } }; return preq.get(options).then(function(callRes) { var chtml = cheerio.load(callRes.body); @@ -110,6 +110,22 @@ describe('scraping', function() { }); }); }); + + it('should get html dir parameter', function() { + var expected = "rtl"; + var options = { + url: "https://www.iranrights.org/fa/", + headers: { + 'User-Agent': 'webscraper' + } + }; + return preq.get(options).then(function(callRes) { + var chtml = cheerio.load(callRes.body); + return meta.parseGeneral(chtml).then(function(results) { + assert.deepEqual(results.dir, expected); + }); + }); + }); }); describe('parseHighwirePress function', function() { diff --git a/test/static.js b/test/static.js index 5edb49f..550c473 100644 --- a/test/static.js +++ b/test/static.js @@ -65,7 +65,7 @@ describe('static files', function() { }); }); - it('should be case insensitive on Turtle Article file', function() { + it('should be case insensitive on turtle article file', function() { expected = JSON.parse(fs.readFileSync('./test/static/turtle_article.json')); $ = cheerio.load(fs.readFileSync('./test/static/turtle_article_case.html')); return meta.parseAll($).then(function(results){ diff --git a/test/static/turtle_article.html b/test/static/turtle_article.html index 40679d9..6e6a438 100644 --- a/test/static/turtle_article.html +++ b/test/static/turtle_article.html @@ -1,4 +1,4 @@ -<html lang="en"> +<html lang="en" dir="ltr"> <head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb# article: http://ogp.me/ns/article#"> diff --git a/test/static/turtle_article.json b/test/static/turtle_article.json index 658346f..97917ae 100644 --- a/test/static/turtle_article.json +++ b/test/static/turtle_article.json @@ -50,6 +50,7 @@ "authorlink": "http://examples.com/turtlelvr", "canonical": "http://example.com/turtles", "description": "Exposition on the awesomeness of turtles", + "dir": "ltr", "icons": [ { "href": "turtle.png", diff --git a/test/static/turtle_article_case.html b/test/static/turtle_article_case.html index a365883..d7861a6 100644 --- a/test/static/turtle_article_case.html +++ b/test/static/turtle_article_case.html @@ -1,4 +1,4 @@ -<html lang="en"> +<html lang="en" dir="ltr"> <!-- Turtle Article containing capitALised tags to test case sensitivity -->