Skip to content

Commit fb62266

Browse files
authored
Merge pull request #2519 from spamguy/zsh
Implement Zsh scraper
2 parents 11f1072 + 4fb53e1 commit fb62266

File tree

7 files changed

+133
-0
lines changed

7 files changed

+133
-0
lines changed

assets/javascripts/news.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
[
2+
[
3+
"2025-06-27",
4+
"New documentation: <a href=\"/zsh/\">Zsh</a>"
5+
],
26
[
37
"2025-06-04",
48
"New documentation: <a href=\"/es_toolkit/\">es-toolkit</a>"

lib/docs/filters/zsh/clean_html.rb

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
module Docs
2+
class Zsh
3+
class CleanHtmlFilter < Filter
4+
def call
5+
css('table.header', 'table.menu', 'hr').remove
6+
7+
# Remove indices from headers.
8+
css('h1', 'h2', 'h3').each do |node|
9+
node.content = node.content.match(/^[\d\.]* (.*)$/)&.captures&.first
10+
end
11+
12+
css('h2.section ~ a').each do |node|
13+
node.next_element['id'] = node['name']
14+
end
15+
16+
doc
17+
end
18+
end
19+
end
20+
end

lib/docs/filters/zsh/entries.rb

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
module Docs
2+
class Zsh
3+
class EntriesFilter < Docs::EntriesFilter
4+
def get_name
5+
extract_header_text(at_css('h1.chapter').content)
6+
end
7+
8+
def additional_entries
9+
entries = []
10+
used_fns = []
11+
12+
css('h2.section').each do |node|
13+
type = get_type
14+
# Linkable anchor sits above <h2>.
15+
a = node.xpath('preceding-sibling::a').last
16+
header_text = extract_header_text(node.content)
17+
18+
case type
19+
when 'Zsh Modules'
20+
module_name = header_text.match(/The (zsh\/.* Module)/)&.captures&.first
21+
header_text = module_name if module_name.present?
22+
when 'Calendar Function System'
23+
header_text << ' (Calendar)'
24+
end
25+
26+
entries << [header_text, a['name'], type] unless header_text.start_with?('Description')
27+
end
28+
29+
# Functions are documented within <dl> elements.
30+
# Names are wrapped in <dt>, details within <dd>.
31+
# <dd> can also contain anchors for the next function.
32+
doc.css('> dl').each do |node|
33+
type = get_type
34+
fn_names = node.css('> dt')
35+
node.css('dd a[name]').each_with_index do |anchor, i|
36+
if fn_names[i].present? && anchor['name'].present?
37+
fn_names[i]['id'] = anchor['name']
38+
39+
# Groups of functions are sometimes comma-delimited.
40+
# Strip arguments, flags, etc. from function name.
41+
# Skip flag-only headers.
42+
fn_names[i].inner_html.split(', ').each do |fn|
43+
fn.gsub!(/<(?:tt|var)>(.+?)<\/(?:tt|var)>/, '\1')
44+
fn = fn.split(' ').first
45+
fn.gsub!(/(?:[\[\(]).*(?:[\]\)]).*$/, '')
46+
47+
# Add context for operators.
48+
fn << " (#{type})" if fn.length == 1
49+
50+
if fn.present? && !fn.match?(/^[\-\[]/) && !used_fns.include?(fn)
51+
used_fns << fn
52+
entries << [fn, anchor['name'], type]
53+
end
54+
end
55+
end
56+
end
57+
end
58+
59+
entries
60+
end
61+
62+
def get_type
63+
extract_header_text(at_css('h1.chapter').content)
64+
end
65+
66+
private
67+
68+
# Extracts text from a string, dropping indices preceding it.
69+
def extract_header_text(str)
70+
str.match(/^[\d\.]* (.*)$/)&.captures&.first
71+
end
72+
end
73+
end
74+
end

lib/docs/scrapers/zsh.rb

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
module Docs
2+
class Zsh < UrlScraper
3+
self.type = 'zsh'
4+
self.release = '5.9.0'
5+
self.base_url = 'https://zsh.sourceforge.io/Doc/Release/'
6+
self.root_path = 'index.html'
7+
self.links = {
8+
home: 'https://zsh.sourceforge.io/',
9+
code: 'https://sourceforge.net/p/zsh/web/ci/master/tree/',
10+
}
11+
12+
options[:skip] = %w(
13+
zsh_toc.html
14+
zsh_abt.html
15+
The-Z-Shell-Manual.html
16+
Introduction.html
17+
)
18+
options[:skip_patterns] = [/-Index.html/]
19+
20+
html_filters.push 'zsh/entries', 'zsh/clean_html'
21+
22+
options[:attribution] = <<-HTML
23+
The Z Shell is copyright &copy; 1992&ndash;2017 Paul Falstad, Richard Coleman,
24+
Zoltán Hidvégi, Andrew Main, Peter Stephenson, Sven Wischnowsky, and others.<br />
25+
Licensed under the MIT License.
26+
HTML
27+
28+
def get_latest_version(opts)
29+
body = fetch('https://zsh.sourceforge.io/Doc/Release', opts)
30+
body.scan(/Zsh version ([0-9.]+)/)[0][0]
31+
end
32+
end
33+
end

public/icons/docs/zsh/16.png

687 Bytes
Loading

public/icons/docs/zsh/16@2x.png

1.18 KB
Loading

public/icons/docs/zsh/SOURCE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
https://sourceforge.net/p/zsh/web/ci/master/tree/favicon.png
2+

0 commit comments

Comments
 (0)