Skip to content

Commit

Permalink
Fetch the text direction in parseGeneral
Browse files Browse the repository at this point in the history
  • Loading branch information
YoranBrondsema authored and mvolz committed Mar 1, 2018
1 parent abfc9ba commit c000c68
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 5 deletions.
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -106,6 +106,7 @@ The method parseGeneral obtains the following general metadata:
<link rel="shortlink" href="">
<title></title>
<html lang="en">
<html dir="rtl">
```

## Tests
Expand Down
3 changes: 2 additions & 1 deletion lib/index.js
Expand Up @@ -302,7 +302,8 @@ exports.parseGeneral = BBPromise.method(function(chtml){
robots: chtml('meta[name=robots i]').first().attr('content'), //robots <meta name ="robots" content="">
shortlink: chtml('link[rel=shortlink i]').first().attr('href'), //short link <link rel="shortlink" href="">
title: chtml('title').first().text(), //title tag <title>
lang: chtml('html').first().attr('lang') || chtml('html').first().attr('xml:lang') //lang <html lang=""> or <html xml:lang="">
lang: chtml('html').first().attr('lang') || chtml('html').first().attr('xml:lang'), //lang <html lang=""> or <html xml:lang="">
dir: chtml('html').first().attr('dir') //dir <html dir="">
};

// Copy key-value pairs with defined values to meta
Expand Down
18 changes: 17 additions & 1 deletion test/scraping.js
Expand Up @@ -101,7 +101,7 @@ describe('scraping', function() {
url: "http://www.lemonde.fr",
headers: {
'User-Agent': 'webscraper'
}
}
};
return preq.get(options).then(function(callRes) {
var chtml = cheerio.load(callRes.body);
Expand All @@ -110,6 +110,22 @@ describe('scraping', function() {
});
});
});

it('should get html dir parameter', function() {
var expected = "rtl";
var options = {
url: "https://www.iranrights.org/fa/",
headers: {
'User-Agent': 'webscraper'
}
};
return preq.get(options).then(function(callRes) {
var chtml = cheerio.load(callRes.body);
return meta.parseGeneral(chtml).then(function(results) {
assert.deepEqual(results.dir, expected);
});
});
});
});

describe('parseHighwirePress function', function() {
Expand Down
2 changes: 1 addition & 1 deletion test/static.js
Expand Up @@ -65,7 +65,7 @@ describe('static files', function() {
});
});

it('should be case insensitive on Turtle Article file', function() {
it('should be case insensitive on turtle article file', function() {
expected = JSON.parse(fs.readFileSync('./test/static/turtle_article.json'));
$ = cheerio.load(fs.readFileSync('./test/static/turtle_article_case.html'));
return meta.parseAll($).then(function(results){
Expand Down
2 changes: 1 addition & 1 deletion test/static/turtle_article.html
@@ -1,4 +1,4 @@
<html lang="en">
<html lang="en" dir="ltr">

<head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb# article: http://ogp.me/ns/article#">

Expand Down
1 change: 1 addition & 0 deletions test/static/turtle_article.json
Expand Up @@ -50,6 +50,7 @@
"authorlink": "http://examples.com/turtlelvr",
"canonical": "http://example.com/turtles",
"description": "Exposition on the awesomeness of turtles",
"dir": "ltr",
"icons": [
{
"href": "turtle.png",
Expand Down
2 changes: 1 addition & 1 deletion test/static/turtle_article_case.html
@@ -1,4 +1,4 @@
<html lang="en">
<html lang="en" dir="ltr">
<!--
Turtle Article containing capitALised tags to test case sensitivity
-->
Expand Down

0 comments on commit c000c68

Please sign in to comment.