Skip to content

Commit

Permalink
Fixes #91
Browse files Browse the repository at this point in the history
  • Loading branch information
erwanlr committed Apr 24, 2019
1 parent c1c7be0 commit c064e13
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 41 deletions.
2 changes: 1 addition & 1 deletion lib/cms_scanner/finders/finder/smart_url_checker.rb
Expand Up @@ -29,7 +29,7 @@ def passive(opts = {})
#
# @return [ Array<String> ]
def passive_urls(_opts = {})
target.in_scope_urls(target.homepage_res, passive_urls_xpath)
target.in_scope_uris(target.homepage_res, passive_urls_xpath).map(&:to_s)
end

# @return [ String ]
Expand Down
12 changes: 5 additions & 7 deletions lib/cms_scanner/target.rb
Expand Up @@ -89,10 +89,10 @@ def javascripts_from_page(pattern, page = nil)
# @param [ Typhoeus::Response, String ] page
# @param [ String ] xpath
#
# @yield [ String, Nokogiri::XML::Element ] The url and its associated tag
# @yield [ Addressable::URI, Nokogiri::XML::Element ] The url and its associated tag
#
# @return [ Array<String> ] The absolute URLs detected in the response's body from the HTML tags
def urls_from_page(page = nil, xpath = '//@href|//@src|//@data-src')
# @return [ Array<Addressable::URI> ] The absolute URIs detected in the response's body from the HTML tags
def uris_from_page(page = nil, xpath = '//@href|//@src|//@data-src')
page = NS::Browser.get(url(page)) unless page.is_a?(Typhoeus::Response)
found = []

Expand All @@ -108,13 +108,11 @@ def urls_from_page(page = nil, xpath = '//@href|//@src|//@data-src')
next
end

node_uri_string = node_uri.to_s

next unless node_uri.host

yield node_uri_string, node.parent if block_given? && !found.include?(node_uri_string)
yield node_uri, node.parent if block_given? && !found.include?(node_uri)

found << node_uri_string
found << node_uri
end

found.uniq
Expand Down
22 changes: 12 additions & 10 deletions lib/cms_scanner/target/scope.rb
Expand Up @@ -8,30 +8,32 @@ def scope
@scope ||= Scope.new
end

# @param [ String ] url An absolute URL
# @param [ String, Addressable::URI ] url An absolute URL or URI
#
# @return [ Boolean ] true if the url given is in scope
def in_scope?(url)
scope.include?(Addressable::URI.parse(url.strip).host)
def in_scope?(url_or_uri)
url_or_uri = Addressable::URI.parse(url_or_uri.strip) unless url_or_uri.is_a?(Addressable::URI)

scope.include?(url_or_uri.host)
rescue StandardError
false
end

# @param [ Typhoeus::Response ] res
# @param [ String ] xpath
#
# @yield [ String, Nokogiri::XML::Element ] The in scope url and its associated tag
# @yield [ Addressable::URI, Nokogiri::XML::Element ] The in scope url and its associated tag
#
# @return [ Array<String> ] The in scope absolute URLs detected in the response's body
def in_scope_urls(res, xpath = '//@href|//@src|//@data-src')
# @return [ Array<Addressable::URI> ] The in scope absolute URIs detected in the response's body
def in_scope_uris(res, xpath = '//@href|//@src|//@data-src')
found = []

urls_from_page(res, xpath) do |url, tag|
next unless in_scope?(url)
uris_from_page(res, xpath) do |uri, tag|
next unless in_scope?(uri)

yield url, tag if block_given?
yield uri, tag if block_given?

found << url
found << uri
end

found
Expand Down
@@ -1,6 +1,7 @@
<a href="http://e.org/f.txt">Link</a>
Duplicates should be ignored
<a href="http://e.org/f.txt">Link</a>
<img src="http://e.org/f.txt" />

<a href="mailto:mail@g.com">eMail me!</a>
<a href="jaVaScript:alert(2)">Click me Fool !</a>
Expand Down
24 changes: 13 additions & 11 deletions spec/lib/target/scope_spec.rb
Expand Up @@ -32,13 +32,14 @@
describe '#in_scope?' do
context 'when default scope (target domain)' do
[nil, '', 'http://out-of-scope.com', '//jquery.com/j.js',
'javascript:alert(3)', 'mailto:p@g.com'].each do |url|
'javascript:alert(3)', 'mailto:p@g.com',
Addressable::URI.parse('https://out.cloudfront.net')].each do |url|
it "returns false for #{url}" do
expect(target.in_scope?(url)).to eql false
end
end

%w[https://e.org/file.txt http://e.org/ //e.org].each do |url|
['https://e.org/file.txt', 'http://e.org/', '//e.org', Addressable::URI.parse('http://e.org')].each do |url|
it "returns true for #{url}" do
expect(target.in_scope?(url)).to eql true
end
Expand All @@ -65,16 +66,16 @@
end
end

describe '#in_scope_urls' do
describe '#in_scope_uris' do
let(:res) { Typhoeus::Response.new(body: File.read(fixtures.join('index.html'))) }

context 'when block given' do
it 'yield the url' do
expect { |b| target.in_scope_urls(res, &b) }
expect { |b| target.in_scope_uris(res, &b) }
.to yield_successive_args(
['http://e.org/f.txt', Nokogiri::XML::Element],
['http://e.org/script/s.js', Nokogiri::XML::Element],
['http://e.org/feed', Nokogiri::XML::Element]
[Addressable::URI.parse('http://e.org/f.txt'), Nokogiri::XML::Element],
[Addressable::URI.parse('http://e.org/script/s.js'), Nokogiri::XML::Element],
[Addressable::URI.parse('http://e.org/feed'), Nokogiri::XML::Element]
)
end
end
Expand All @@ -83,16 +84,17 @@
it 'returns the expected array' do
xpath = '//link[@rel="alternate" and @type="application/rss+xml"]/@href'

expect(target.in_scope_urls(res, xpath)).to eql(%w[http://e.org/feed])
expect(target.in_scope_uris(res, xpath)).to eql([Addressable::URI.parse('http://e.org/feed')])
end
end

context 'when no block given' do
after { expect(target.in_scope_urls(res)).to eql @expected }
after { expect(target.in_scope_uris(res)).to eql @expected }

context 'when default scope' do
it 'returns the expected array' do
@expected = %w[http://e.org/f.txt http://e.org/script/s.js http://e.org/feed]
@expected = %w[http://e.org/f.txt http://e.org/script/s.js
http://e.org/feed].map { |url| Addressable::URI.parse(url) }
end
end

Expand All @@ -101,7 +103,7 @@

it 'returns the expected array' do
@expected = %w[http://e.org/f.txt https://a.cdn.com/f2.js http://e.org/script/s.js
http://wp-lamp/robots.txt http://e.org/feed]
http://wp-lamp/robots.txt http://e.org/feed].map { |url| Addressable::URI.parse(url) }
end
end
end
Expand Down
24 changes: 12 additions & 12 deletions spec/lib/target_spec.rb
Expand Up @@ -134,38 +134,38 @@
end
end

describe '#urls_from_page' do
let(:page) { Typhoeus::Response.new(body: File.read(fixtures.join('urls_from_page.html'))) }
describe '#uris_from_page' do
let(:page) { Typhoeus::Response.new(body: File.read(fixtures.join('uris_from_page.html'))) }

context 'when block given' do
it 'yield the url' do
expect { |b| target.urls_from_page(page, &b) }
expect { |b| target.uris_from_page(page, &b) }
.to yield_successive_args(
['http://e.org/f.txt', Nokogiri::XML::Element],
['https://cdn.e.org/f2.js', Nokogiri::XML::Element],
['http://e.org/script/s.js', Nokogiri::XML::Element],
['http://wp-lamp/feed.xml', Nokogiri::XML::Element],
['http://g.com/img.jpg', Nokogiri::XML::Element],
['http://g.org/logo.png', Nokogiri::XML::Element]
[Addressable::URI.parse('http://e.org/f.txt'), Nokogiri::XML::Element],
[Addressable::URI.parse('https://cdn.e.org/f2.js'), Nokogiri::XML::Element],
[Addressable::URI.parse('http://e.org/script/s.js'), Nokogiri::XML::Element],
[Addressable::URI.parse('http://wp-lamp/feed.xml'), Nokogiri::XML::Element],
[Addressable::URI.parse('http://g.com/img.jpg'), Nokogiri::XML::Element],
[Addressable::URI.parse('http://g.org/logo.png'), Nokogiri::XML::Element]
)
end
end

context 'when no block given' do
it 'returns the expected array' do
expect(target.urls_from_page(page)).to eql(
expect(target.uris_from_page(page)).to eql(
%w[
http://e.org/f.txt https://cdn.e.org/f2.js http://e.org/script/s.js
http://wp-lamp/feed.xml http://g.com/img.jpg http://g.org/logo.png
]
].map { |url| Addressable::URI.parse(url) }
)
end

context 'when xpath argument given' do
it 'returns the expected array' do
xpath = '//link[@rel="alternate" and @type="application/rss+xml"]/@href'

expect(target.urls_from_page(page, xpath)).to eql(%w[http://wp-lamp/feed.xml])
expect(target.uris_from_page(page, xpath)).to eql([Addressable::URI.parse('http://wp-lamp/feed.xml')])
end
end
end
Expand Down

0 comments on commit c064e13

Please sign in to comment.