@@ -700,6 +700,12 @@ def template_for file, page = true, klass = ERB
700
700
template
701
701
end
702
702
703
+ # :stopdoc:
704
+ ParagraphExcerptRegexpOther = %r[\b \w [^./:]++\. ]
705
+ # use \p/\P{letter} instead of \w/\W in Unicode
706
+ ParagraphExcerptRegexpUnicode = %r[\b \p {letter}[^./:]++\. ]
707
+ # :startdoc:
708
+
703
709
# Returns an excerpt of the comment for usage in meta description tags
704
710
def excerpt ( comment )
705
711
text = case comment
@@ -711,14 +717,22 @@ def excerpt(comment)
711
717
712
718
# Match from a capital letter to the first period, discarding any links, so
713
719
# that we don't end up matching badges in the README
714
- first_paragraph_match = text . match ( /[A-Z][^\. :\/ ]+\. / )
715
- return text [ 0 ...150 ] . gsub ( /\n / , " " ) . squeeze ( " " ) unless first_paragraph_match
720
+ pattern = ParagraphExcerptRegexpUnicode
721
+ begin
722
+ first_paragraph_match = text . match ( pattern )
723
+ rescue Encoding ::CompatibilityError
724
+ # The doc is non-ASCII text and encoded in other than Unicode base encodings.
725
+ raise if pattern == ParagraphExcerptRegexpOther
726
+ pattern = ParagraphExcerptRegexpOther
727
+ retry
728
+ end
729
+ return text [ 0 ...150 ] . tr_s ( "\n " , " " ) . squeeze ( " " ) unless first_paragraph_match
716
730
717
731
extracted_text = first_paragraph_match [ 0 ]
718
- second_paragraph = first_paragraph_match . post_match . match ( /[A-Z][^ \. : \/ ]+ \. / )
732
+ second_paragraph = text . match ( pattern , first_paragraph_match . end ( 0 ) )
719
733
extracted_text << " " << second_paragraph [ 0 ] if second_paragraph
720
734
721
- extracted_text [ 0 ...150 ] . gsub ( / \n / , " " ) . squeeze ( " " )
735
+ extracted_text [ 0 ...150 ] . tr_s ( " \n " , " " ) . squeeze ( " " )
722
736
end
723
737
724
738
def generate_ancestor_list ( ancestors , klass )
0 commit comments