diff --git a/lib/cms_scanner/target/scope.rb b/lib/cms_scanner/target/scope.rb index 75214097..e46c760a 100644 --- a/lib/cms_scanner/target/scope.rb +++ b/lib/cms_scanner/target/scope.rb @@ -43,15 +43,25 @@ def in_scope_uris(res, xpath = '//@href|//@src|//@data-src') # # @return [ Regexp ] The pattern related to the target url and in scope domains, # it also matches escaped /, such as in JSON JS data: http:\/\/t.com\/ + # rubocop:disable Metrics/AbcSize def scope_url_pattern return @scope_url_pattern if @scope_url_pattern - domains = [uri.host + uri.path] + scope.domains[1..-1]&.map(&:to_s) + scope.invalid_domains + domains = [uri.host + uri.path] + + domains += if scope.domains.empty? + [*scope.invalid_domains[1..-1]] + else + [*scope.domains[1..-1]].map(&:to_s) + scope.invalid_domains + end domains.map! { |d| Regexp.escape(d.gsub(%r{/$}, '')).gsub('\*', '.*').gsub('/', '\\\\\?/') } + domains[0].gsub!(Regexp.escape(uri.host), Regexp.escape(uri.host) + '(?::\\d+)?') if uri.port + @scope_url_pattern = %r{https?:\\?/\\?/(?:#{domains.join('|')})\\?/?}i end + # rubocop:enable Metrics/AbcSize # Scope Implementation class Scope diff --git a/lib/cms_scanner/version.rb b/lib/cms_scanner/version.rb index ca1d6a5d..3bb5b596 100644 --- a/lib/cms_scanner/version.rb +++ b/lib/cms_scanner/version.rb @@ -2,5 +2,5 @@ # Version module CMSScanner - VERSION = '0.5.0' + VERSION = '0.5.1' end diff --git a/spec/lib/target/scope_spec.rb b/spec/lib/target/scope_spec.rb index 8bf98672..a256f300 100644 --- a/spec/lib/target/scope_spec.rb +++ b/spec/lib/target/scope_spec.rb @@ -112,17 +112,32 @@ describe '#scope_url_pattern' do context 'when no scope given' do its(:scope_url_pattern) { should eql %r{https?:\\?/\\?/(?:e\.org)\\?/?}i } + + context 'when target is an invalid domain for PublicSuffix' do + let(:url) { 'http://wp-lab/' } + + its(:scope_url_pattern) { should eql %r{https?:\\?/\\?/(?:wp\-lab)\\?/?}i } + end + + context 'when a port is present in the target URL' do + let(:url) { 'http://wp.lab:82/aa' } + + its(:scope_url_pattern) { should eql %r{https?:\\?/\\?/(?:wp\.lab(?::\d+)?\\?/aa)\\?/?}i } + its(:scope_url_pattern) { should match 'https://wp.lab:82/aa' } + end end context 'when scope given' do - let(:opts) { super().merge(scope: ['*.cdn.org', 'wp-lamp']) } + let(:opts) { super().merge(scope: ['*.cdn.org', 'wp-lamp', '192.168.1.1']) } - its(:scope_url_pattern) { should eql %r{https?:\\?/\\?/(?:e\.org|.*\.cdn\.org|wp\-lamp)\\?/?}i } + its(:scope_url_pattern) { should eql %r{https?:\\?/\\?/(?:e\.org|.*\.cdn\.org|192\.168\.1\.1|wp\-lamp)\\?/?}i } context 'when target URL has a subdir' do let(:url) { 'https://e.org/blog/test' } - its(:scope_url_pattern) { should eql %r{https?:\\?/\\?/(?:e\.org\\?/blog\\?/test|.*\.cdn\.org|wp\-lamp)\\?/?}i } + its(:scope_url_pattern) do + should eql %r{https?:\\?/\\?/(?:e\.org\\?/blog\\?/test|.*\.cdn\.org|192\.168\.1\.1|wp\-lamp)\\?/?}i + end end end end