Permalink
Browse files

Improve url detection

  • Loading branch information...
Kikobeats committed May 30, 2018
1 parent 2335c33 commit 64f668aabcc0ff6687895f82b97237a5d952a5c6
Showing with 5 additions and 20 deletions.
  1. +4 −17 bin/cli/extract-urls.js
  2. +1 −3 package.json
@@ -1,11 +1,9 @@
'use strict'
const { getUrl, isUrl } = require('@metascraper/helpers')
const normalizeUrl = require('normalize-url')
const getUrlsFromHtml = require('html-urls')
const { map, reduce } = require('lodash')
const fromXML = require('xml-urls')
const cheerio = require('cheerio')
const { map } = require('lodash')
const got = require('got')
const { isXmlUrl } = fromXML
@@ -18,18 +16,7 @@ const fromHTML = async (url, { selector, ...opts }) => {
return map(urls, 'normalizeUrl')
}
module.exports = async (url, opts) => {
const urls = isXmlUrl(url)
? await fromXML(url, opts)
: await fromHTML(url, opts)
return reduce(
urls,
(acc, relativeUrl) => {
const absoluteUrl = normalizeUrl(getUrl(url, relativeUrl))
if (isUrl(absoluteUrl, { relative: false })) acc.push(absoluteUrl)
return acc
},
[]
)
module.exports = (url, opts) => {
const fn = isXmlUrl(url) ? fromXML : fromHTML
return fn(url, opts)
}
@@ -32,19 +32,17 @@
"validate"
],
"dependencies": {
"@metascraper/helpers": "~3.11.4",
"@urlint/core": "~1.2.1",
"chalk": "~2.4.1",
"cheerio": "~1.0.0-rc.2",
"ci-env": "~1.6.1",
"clean-stack": "~1.3.0",
"github-build": "~1.2.0",
"got": "~8.3.1",
"html-urls": "~1.0.5",
"html-urls": "~1.0.8",
"lodash": "~4.17.10",
"meow": "~5.0.0",
"neat-log": "~2.4.0",
"normalize-url": "~3.0.0",
"ora": "~2.1.0",
"pretty-error": "~2.1.1",
"pretty-ms": "~3.2.0",

0 comments on commit 64f668a

Please sign in to comment.