forked from rspec/rspec.github.io
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlink_checker
executable file
·144 lines (108 loc) · 4.02 KB
/
link_checker
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/usr/bin/env ruby
url = ARGV[0]
usage_message = <<-USAGE
This tool is designed to check links on cucumber documentation, usage:
#{__FILE__} http://<url>:port/features/<Version>/<Library>
USAGE
abort usage_message if url.nil? || url == ""
require 'uri'
uri = URI.parse(url)
abort usage_message unless uri.path =~ %r[^/features/\d+-\d+/rspec-\w+/?$]
begin
require 'bundler/setup'
require 'nokogiri'
require 'httparty'
rescue LoadError
abort "Could not load one of bundler / nokogiri / httparty, check your bundle and try again."
end
def check(library, version, host, port, source, page, state)
next_pages = {}
page.css('a').each do |a|
href = a.attr('href')
next if href.nil? || href == ""
to_parse = URI.parse(href)
to_parse.scheme = "http" unless to_parse.scheme
to_parse.port = port unless to_parse.host
to_parse.host = host unless to_parse.host
if to_parse.path =~ /^\.\//
sections = to_parse.path.to_s.split("/")
abort <<-ERROR unless "." == sections.shift
Tried to shift a '.' off #{to_parse.path} but failed
ERROR
root = source.path.split("/")
root.pop if root.pop == ""
to_parse.path = (root + sections).join("/")
elsif to_parse.path =~ /\.\./
sections = to_parse.path.to_s.split("/")
continue = true
count = 1
while !sections.empty? && continue do
section = sections.shift
if section == ".."
count += 1
else
sections.unshift section
continue = false
end
end
root = source.path.split("/")
abort <<-EMPTY if sections.empty?
Unable to hydrate url #{to_parse.path} on #{source.to_s}
EMPTY
to_parse.path = "#{root[0..(-1-count)].join("/")}/#{sections.join("/")}"
end
next if state[to_parse.to_s]
$stdout.write "\r#{' ' * 200}\rHandling link... #{to_parse}"
if to_parse.host != host
next if ENV['SKIP_EXTERNAL']
$stdout.write "\r#{' ' * 200}\rRequesting external link... #{to_parse}"
begin
response = HTTParty.get(to_parse)
state[to_parse.to_s] = {code: response.code, type: :external}
rescue Exception => error
state[to_parse.to_s] = {error: error, source: source, type: :external}
end
next
end
unless to_parse.path =~ %r[/features/#{version}/#{library}]
$stdout.write "\r#{' ' * 200}\rRequesting internal link... #{to_parse}"
response = HTTParty.get(to_parse)
state[to_parse.to_s] = {code: response.code, source: source, type: :internal_non_crawled}
next
end
response = HTTParty.get(to_parse)
state[to_parse.to_s] = {code: response.code, source: source, type: :internal_crawled}
if response.code == 200
$stdout.write "\r#{' ' * 200}\rParsing link... #{to_parse}"
result = Nokogiri.parse(response.body)
state[to_parse.to_s][:links] = result.css('a').length
next_pages[to_parse] = result
end
end
next_pages.reduce(state) do |current_state, (source, page)|
check(library, version, host, port, source, page, current_state)
end
state
end
(_slash, features, version, library,) = uri.path.split("/")
abort <<-WARNING unless features == "features"
Something went wrong parsing the url #{uri.path} as #{features.inspect} was not "features" and so we cant detect library: #{library.inspect}
WARNING
$stdout.write "Checking..."
$stdout.write "\r#{' ' * 200}\rParsing... #{uri}"
response = HTTParty.get(uri)
abort <<-ROOT_ERROR unless response.code == 200
The source page #{uri} could not be loaded succesfully, got #{response.code}.
ROOT_ERROR
urls_checked = {}
page = Nokogiri.parse(response.body)
check(library, version, uri.host, uri.port, uri, page, urls_checked)
$stdout.write "\r#{' ' * 200}\rDone!\n"
urls_checked.each do |url, result|
if result[:code] != 200
puts "Warning! #{result[:code]} from #{url} via #{result[:source].to_s} please manually check."
puts "debug: #{result.inspect}" if ENV['DEBUG'] || ENV['VERBOSE']
else
puts "verbose: #{result.inspect}" if ENV['VERBOSE']
end
end