Permalink
Browse files

Add selector support

  • Loading branch information...
Kikobeats committed May 5, 2018
1 parent 27ddf73 commit e49857b23d78b3b0ba1ad206e9fa201366163aa0
Showing with 15 additions and 5 deletions.
  1. +5 −2 bin/cli/extract-urls.js
  2. +5 −3 bin/cli/help.js
  3. +4 −0 bin/cli/index.js
  4. +1 −0 package.json
@@ -5,12 +5,15 @@ const normalizeUrl = require('normalize-url')
const getUrlsFromHtml = require('html-urls')
const { map, reduce } = require('lodash')
const fromXML = require('xml-urls')
const cheerio = require('cheerio')
const got = require('got')
const { isXmlUrl } = fromXML
const fromHTML = async (url, opts) => {
const { body: html } = await got(url, opts)
const fromHTML = async (url, { selector, ...opts }) => {
const { body: rawHtml } = await got(url, opts)
const $ = cheerio.load(rawHtml)
const html = selector ? $(selector).html() : rawHtml
const urls = await getUrlsFromHtml({ url, html, ...opts })
return map(urls, 'normalizeUrl')
}
@@ -14,6 +14,7 @@ module.exports = gray(`${gray(description)}
-h, --help Show the help information
-q, --quiet Show only the resume (defaults to false)
-r, --retries Number of request retries when network errors happens (defaults to 2)
-s, --selector Only detects URLs inside the selector
-t, --timeout Milliseconds to wait before consider a timeout response
-v, --verbose Enable verbose output (defaults to false)
-v, --version Output the version number
@@ -30,6 +31,7 @@ module.exports = gray(`${gray(description)}
)} https://kikobeats.com --whitelist https://www.linkedin.com/in/kikobeats
– Exclude based in a matcher
$ ${green(
'urlint'
)} https://kikobeats.com --whitelist "https://github.com*`)
$ ${green('urlint')} https://kikobeats.com --whitelist "https://github.com*
– Just detect URLs inside body selector
$ ${green('urlint')} https://kikobeats.com --selector body`)
@@ -56,6 +56,10 @@ const cli = require('meow')(require('./help'), {
type: 'number',
default: 8
},
selector: {
alias: 's',
type: 'string'
},
verbose: {
alias: 'v',
type: 'boolean',
@@ -35,6 +35,7 @@
"@metascraper/helpers": "~3.10.7",
"aggregate-error": "~1.0.0",
"chalk": "~2.4.1",
"cheerio": "~1.0.0-rc.2",
"dnserrors": "~2.0.3",
"got": "~8.3.0",
"html-urls": "~1.0.5",

0 comments on commit e49857b

Please sign in to comment.