From dca05ee0a5d840eabc95a7c4c8ff762a1ccba73f Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Fri, 5 Nov 2021 21:57:43 +0000 Subject: [PATCH] Add warning for BIDI characters in page renders and in diffs Fix #17514 Signed-off-by: Andrew Thornton --- modules/charset/charset.go | 57 ++++++++++++++++ modules/charset/charset_test.go | 93 +++++++++++++++++++++++++- options/locale/locale_en-US.ini | 3 + routers/web/repo/lfs.go | 1 + routers/web/repo/view.go | 7 ++ routers/web/repo/wiki.go | 4 ++ services/gitdiff/gitdiff.go | 21 ++++++ templates/repo/diff/section_split.tmpl | 10 +-- templates/repo/settings/lfs_file.tmpl | 9 +++ templates/repo/view_file.tmpl | 9 +++ templates/repo/wiki/view.tmpl | 33 ++++++++- web_src/js/features/common-global.js | 6 ++ web_src/js/features/diff.js | 28 ++++++++ web_src/js/index.js | 3 +- web_src/less/_base.less | 4 ++ web_src/less/_review.less | 9 +++ 16 files changed, 287 insertions(+), 10 deletions(-) diff --git a/modules/charset/charset.go b/modules/charset/charset.go index ae5cf5aa1a4e..efc7d85efa14 100644 --- a/modules/charset/charset.go +++ b/modules/charset/charset.go @@ -23,6 +23,63 @@ import ( // UTF8BOM is the utf-8 byte-order marker var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'} +// BIDIRunes are runes that are explicitly mentioned in CVE-2021-42574 +var BIDIRunes = "\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069" + +// ContainsBIDIRuneString checks some text for any bidi rune +func ContainsBIDIRuneString(text string) bool { + return strings.ContainsAny(text, BIDIRunes) +} + +// ContainsBIDIRuneBytes checks some text for any bidi rune +func ContainsBIDIRuneBytes(text []byte) bool { + return bytes.ContainsAny(text, BIDIRunes) +} + +// ContainsBIDIRuneReader checks some text for any bidi rune +func ContainsBIDIRuneReader(text io.Reader) (bool, error) { + buf := make([]byte, 4096) + readStart := 0 + var err error + var n int + for err == nil { + n, err = text.Read(buf[readStart:]) + bs := buf[:n] + i := 0 + inner: + for i < n { + r, size := utf8.DecodeRune(bs[i:]) + if r == utf8.RuneError { + // need to decide what to do here... runes can be at most 4 bytes - so... i123n + if n-i > 3 { + // this is a real broken rune + return true, fmt.Errorf("text contains bad rune: %x", bs[i]) + } + + break inner + } + if strings.ContainsRune(BIDIRunes, r) { + return true, nil + } + i += size + } + if n > 0 { + readStart = 0 + } + if i < n { + copy(buf, bs[i:n]) + readStart = n - i + } + } + if readStart > 0 { + return true, fmt.Errorf("text contains bad rune: %x", buf[0]) + } + if err == io.EOF { + return false, nil + } + return true, err +} + // ToUTF8WithFallbackReader detects the encoding of content and coverts to UTF-8 reader if possible func ToUTF8WithFallbackReader(rd io.Reader) io.Reader { var buf = make([]byte, 2048) diff --git a/modules/charset/charset_test.go b/modules/charset/charset_test.go index 33f0c10a7a20..d4b01bcd15ad 100644 --- a/modules/charset/charset_test.go +++ b/modules/charset/charset_test.go @@ -5,11 +5,11 @@ package charset import ( + "bytes" "strings" "testing" "code.gitea.io/gitea/modules/setting" - "github.com/stretchr/testify/assert" ) @@ -272,3 +272,94 @@ func stringMustEndWith(t *testing.T, expected string, value string) { func bytesMustStartWith(t *testing.T, expected []byte, value []byte) { assert.Equal(t, expected, value[:len(expected)]) } + +var bidiTestCases = []struct { + name string + text string + want bool +}{ + { + name: "Accented characters", + text: string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}), + want: false, + }, + { + name: "Program", + text: "string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})", + want: false, + }, + { + name: "CVE testcase", + text: "if access_level != \"user\u202E \u2066// Check if admin\u2069 \u2066\" {", + want: true, + }, +} + +func TestContainsBIDIRuneString(t *testing.T) { + for _, tt := range bidiTestCases { + t.Run(tt.name, func(t *testing.T) { + if got := ContainsBIDIRuneString(tt.text); got != tt.want { + t.Errorf("ContainsBIDIRuneString() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestContainsBIDIRuneBytes(t *testing.T) { + for _, tt := range bidiTestCases { + t.Run(tt.name, func(t *testing.T) { + if got := ContainsBIDIRuneBytes([]byte(tt.text)); got != tt.want { + t.Errorf("ContainsBIDIRuneBytes() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestContainsBIDIRuneReader(t *testing.T) { + for _, tt := range bidiTestCases { + t.Run(tt.name, func(t *testing.T) { + got, err := ContainsBIDIRuneReader(strings.NewReader(tt.text)) + if err != nil { + t.Errorf("ContainsBIDIRuneReader() error = %v", err) + return + } + if got != tt.want { + t.Errorf("ContainsBIDIRuneReader() = %v, want %v", got, tt.want) + } + }) + } + + // now we need some specific tests with broken runes + for _, tt := range bidiTestCases { + t.Run(tt.name+"-terminal-xc2", func(t *testing.T) { + bs := []byte(tt.text) + bs = append(bs, 0xc2) + got, err := ContainsBIDIRuneReader(bytes.NewReader(bs)) + if !tt.want { + if err == nil { + t.Errorf("ContainsBIDIRuneReader() should have errored") + return + } + } else if !got { + t.Errorf("ContainsBIDIRuneReader() should have returned true") + return + } + }) + } + + for _, tt := range bidiTestCases { + t.Run(tt.name+"-prefix-xff", func(t *testing.T) { + bs := append([]byte{0xff}, []byte(tt.text)...) + got, err := ContainsBIDIRuneReader(bytes.NewReader(bs)) + if err == nil { + t.Errorf("ContainsBIDIRuneReader() should have errored") + return + } + if !got { + t.Errorf("ContainsBIDIRuneReader() should have returned true") + return + } + }) + } + +} diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index c1762799ebcf..f01afe04b45e 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -977,6 +977,8 @@ file_view_rendered = View Rendered file_view_raw = View Raw file_permalink = Permalink file_too_large = The file is too large to be shown. +bidi_header = `This file contains hidden Unicode characters!` +bidi_description = `This file contains hidden Unicode characters that may be processed differently from what appears below.
If your use case is intentional and legitimate, you can safely ignore this warning.
Consult documentation of your favorite text editor for how to open this file using `DOS (CP 437)` encoding instead of Unicode, to reveal hidden characters.` file_copy_permalink = Copy Permalink video_not_supported_in_browser = Your browser does not support the HTML5 'video' tag. audio_not_supported_in_browser = Your browser does not support the HTML5 'audio' tag. @@ -2057,6 +2059,7 @@ diff.protected = Protected diff.image.side_by_side = Side by Side diff.image.swipe = Swipe diff.image.overlay = Overlay +diff.has_bidi = This line has hidden Unicode characters releases.desc = Track project versions and downloads. release.releases = Releases diff --git a/routers/web/repo/lfs.go b/routers/web/repo/lfs.go index 5e24cfa3c0a8..ea859cf6806b 100644 --- a/routers/web/repo/lfs.go +++ b/routers/web/repo/lfs.go @@ -316,6 +316,7 @@ func LFSFileGet(ctx *context.Context) { output.WriteString(fmt.Sprintf(`
  • %s
  • `, index+1, index+1, line)) } ctx.Data["FileContent"] = gotemplate.HTML(output.String()) + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(output.String()) output.Reset() for i := 0; i < len(lines); i++ { diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index 90be631c7349..68fc7ca40534 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -332,11 +332,13 @@ func renderDirectory(ctx *context.Context, treeLink string) { if err != nil { log.Error("Render failed: %v then fallback", err) bs, _ := io.ReadAll(rd) + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(bs) ctx.Data["FileContent"] = strings.ReplaceAll( gotemplate.HTMLEscapeString(string(bs)), "\n", `
    `, ) } else { ctx.Data["FileContent"] = result.String() + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(result.String()) } } else { ctx.Data["IsRenderedHTML"] = true @@ -344,6 +346,7 @@ func renderDirectory(ctx *context.Context, treeLink string) { if err != nil { log.Error("ReadAll failed: %v", err) } + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(buf) ctx.Data["FileContent"] = strings.ReplaceAll( gotemplate.HTMLEscapeString(string(buf)), "\n", `
    `, ) @@ -490,9 +493,11 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st return } ctx.Data["FileContent"] = result.String() + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(result.String()) } else if readmeExist { buf, _ := io.ReadAll(rd) ctx.Data["IsRenderedHTML"] = true + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(buf) ctx.Data["FileContent"] = strings.ReplaceAll( gotemplate.HTMLEscapeString(string(buf)), "\n", `
    `, ) @@ -501,6 +506,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st lineNums := linesBytesCount(buf) ctx.Data["NumLines"] = strconv.Itoa(lineNums) ctx.Data["NumLinesSet"] = true + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneBytes(buf) ctx.Data["FileContent"] = highlight.File(lineNums, blob.Name(), buf) } if !isLFSFile { @@ -550,6 +556,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st return } ctx.Data["FileContent"] = result.String() + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(result.String()) } } diff --git a/routers/web/repo/wiki.go b/routers/web/repo/wiki.go index a571c46fd73e..de51e93f7381 100644 --- a/routers/web/repo/wiki.go +++ b/routers/web/repo/wiki.go @@ -16,6 +16,7 @@ import ( "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/base" + "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" @@ -232,6 +233,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { return nil, nil } ctx.Data["content"] = buf.String() + ctx.Data["HasBIDI"] = charset.ContainsBIDIRuneString(buf.String()) buf.Reset() if err := markdown.Render(rctx, bytes.NewReader(sidebarContent), &buf); err != nil { @@ -243,6 +245,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { } ctx.Data["sidebarPresent"] = sidebarContent != nil ctx.Data["sidebarContent"] = buf.String() + ctx.Data["sidebarHasBIDI"] = charset.ContainsBIDIRuneString(buf.String()) buf.Reset() if err := markdown.Render(rctx, bytes.NewReader(footerContent), &buf); err != nil { @@ -254,6 +257,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { } ctx.Data["footerPresent"] = footerContent != nil ctx.Data["footerContent"] = buf.String() + ctx.Data["footerHasBIDI"] = charset.ContainsBIDIRuneString(buf.String()) // get commit count - wiki revisions commitsCount, _ := wikiRepo.FileCommitsCount("master", pageFilename) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index f843bc4dcf9d..df99c96408c6 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -117,6 +117,27 @@ func (d *DiffLine) GetCommentSide() string { return d.Comments[0].DiffSide() } +// HasBIDI returns true if there is a BIDI rune in this line +func (d *DiffLine) HasBIDI() bool { + return charset.ContainsBIDIRuneString(d.Content) +} + +// LeftHasBIDI returns true if there is a BIDI rune in this line +func (d *DiffLine) LeftHasBIDI() bool { + if d.LeftIdx > 0 { + return charset.ContainsBIDIRuneString(d.Content) + } + return false +} + +// RightHasBIDI returns true if there is a BIDI rune in this line +func (d *DiffLine) RightHasBIDI() bool { + if d.RightIdx > 0 { + return charset.ContainsBIDIRuneString(d.Content) + } + return false +} + // GetLineTypeMarker returns the line type marker func (d *DiffLine) GetLineTypeMarker() string { if strings.IndexByte(" +-", d.Content[0]) > -1 { diff --git a/templates/repo/diff/section_split.tmpl b/templates/repo/diff/section_split.tmpl index aed6d784b307..ebc88cffe19a 100644 --- a/templates/repo/diff/section_split.tmpl +++ b/templates/repo/diff/section_split.tmpl @@ -22,22 +22,22 @@ {{end}} - {{$section.GetComputedInlineDiffFor $line}} + {{if $line.LeftHasBIDI}}️⚠{{end}}{{$section.GetComputedInlineDiffFor $line}} {{else if and (eq .GetType 3) $hasmatch}}{{/* DEL */}} {{$match := index $section.Lines $line.Match}} - {{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{svg "octicon-plus"}}{{end}}{{if $line.LeftIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}} + {{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{svg "octicon-plus"}}{{end}}{{if $line.LeftHasBIDI}}️⚠{{end}}{{if $line.LeftIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}} {{if $match.RightIdx}}{{end}} - {{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{svg "octicon-plus"}}{{end}}{{if $match.RightIdx}}{{$section.GetComputedInlineDiffFor $match}}{{end}} + {{if and $.root.SignedUserID $.root.PageIsPullFiles}}{{svg "octicon-plus"}}{{end}}{{if $match.RightHasBIDI}}️⚠{{end}}{{if $match.RightIdx}}{{$section.GetComputedInlineDiffFor $match}}{{end}} {{else}} {{if $line.LeftIdx}}{{end}} - {{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 2))}}{{svg "octicon-plus"}}{{end}}{{if $line.LeftIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}} + {{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 2))}}{{svg "octicon-plus"}}{{end}}{{if $line.LeftHasBIDI}}️⚠{{end}}{{if $line.LeftIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}} {{if $line.RightIdx}}{{end}} - {{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 3))}}{{svg "octicon-plus"}}{{end}}{{if $line.RightIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}} + {{if and $.root.SignedUserID $.root.PageIsPullFiles (not (eq .GetType 3))}}{{svg "octicon-plus"}}{{end}}{{if $line.RightHasBIDI}}️⚠{{end}}{{if $line.RightIdx}}{{$section.GetComputedInlineDiffFor $line}}{{end}} {{end}} {{if and (eq .GetType 3) $hasmatch}} diff --git a/templates/repo/settings/lfs_file.tmpl b/templates/repo/settings/lfs_file.tmpl index 478c034e1107..5719571da279 100644 --- a/templates/repo/settings/lfs_file.tmpl +++ b/templates/repo/settings/lfs_file.tmpl @@ -12,6 +12,15 @@
    + {{if .HasBIDI}} +
    + {{svg "octicon-x" 16 "close inside"}} +
    + {{.i18n.Tr "repo.bidi_header"}} +
    + {{.i18n.Tr "repo.bidi_description" | Str2html}} +
    + {{end}}
    {{if .IsMarkup}} {{if .FileContent}}{{.FileContent | Safe}}{{end}} diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index 0c8990a4f5c8..0d9aa99b61f5 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -64,6 +64,15 @@ {{end}}
    + {{if .HasBIDI}} +
    + {{svg "octicon-x" 16 "close inside"}} +
    + {{.i18n.Tr "repo.bidi_header"}} +
    + {{.i18n.Tr "repo.bidi_description" | Str2html}} +
    + {{end}}
    {{if .IsMarkup}} {{if .FileContent}}{{.FileContent | Safe}}{{end}} diff --git a/templates/repo/wiki/view.tmpl b/templates/repo/wiki/view.tmpl index a393fb20a18f..a2d6eb7b5e7b 100644 --- a/templates/repo/wiki/view.tmpl +++ b/templates/repo/wiki/view.tmpl @@ -62,6 +62,15 @@ {{end}}
    + {{if .HasBIDI}} +
    + {{svg "octicon-x" 16 "close inside"}} +
    + {{.i18n.Tr "repo.bidi_header"}} +
    + {{.i18n.Tr "repo.bidi_description" | Str2html}} +
    + {{end}} {{.content | Str2html}}
    {{if .sidebarPresent}} @@ -70,6 +79,15 @@ {{if and .CanWriteWiki (not .Repository.IsMirror)}} {{svg "octicon-pencil"}} {{end}} + {{if .sidebarHasBIDI}} +
    + {{svg "octicon-x" 16 "close inside"}} +
    + {{.i18n.Tr "repo.bidi_header"}} +
    + {{.i18n.Tr "repo.bidi_description" | Str2html}} +
    + {{end}} {{.sidebarContent | Str2html}}
    @@ -77,9 +95,18 @@
    {{if .footerPresent}}
    - {{if and .CanWriteWiki (not .Repository.IsMirror)}} - {{svg "octicon-pencil"}} - {{end}} + {{if and .CanWriteWiki (not .Repository.IsMirror)}} + {{svg "octicon-pencil"}} + {{end}} + {{if .footerHasBIDI}} +
    + {{svg "octicon-x" 16 "close inside"}} +
    + {{.i18n.Tr "repo.bidi_header"}} +
    + {{.i18n.Tr "repo.bidi_description" | Str2html}} +
    + {{end}} {{.footerContent | Str2html}}
    {{end}} diff --git a/web_src/js/features/common-global.js b/web_src/js/features/common-global.js index 04d44d8142ab..eb5216bfdc7b 100644 --- a/web_src/js/features/common-global.js +++ b/web_src/js/features/common-global.js @@ -82,6 +82,12 @@ export function initGlobalCommon() { } // Semantic UI modules. + $('.message .close').on('click', function() { + $(this) + .closest('.message') + .hide(); + }); + $('.dropdown:not(.custom)').dropdown({ fullTextSearch: 'exact' }); diff --git a/web_src/js/features/diff.js b/web_src/js/features/diff.js index ef0aaceeeb50..75797733b4ea 100644 --- a/web_src/js/features/diff.js +++ b/web_src/js/features/diff.js @@ -22,3 +22,31 @@ export function initDiffShowMore() { }); }); } + +export function initShowBidi() { + $('a.code-has-bidi').on('click', (e) => { + e.preventDefault(); + + $('a.code-has-bidi').each((_, target) => { + const inner = $(target).siblings().closest('.code-inner'); + const escaped = inner.data('escaped'); + let original = inner.data('original'); + + if (escaped) { + inner.html(original); + inner.data('escaped', ''); + } else { + if (!original) { + original = $(inner).html(); + inner.data('original', original); + } + + inner.html(original.replaceAll(/([\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069])/g, (match) => { + const value = match.charCodeAt(0).toString(16); + return `&#${value};`; + })); + inner.data('escaped', 'escaped'); + } + }); + }); +} diff --git a/web_src/js/index.js b/web_src/js/index.js index a3bd35175e5a..ecec6683a460 100644 --- a/web_src/js/index.js +++ b/web_src/js/index.js @@ -20,7 +20,7 @@ import {initNotificationCount, initNotificationsTable} from './features/notifica import {initLastCommitLoader} from './features/lastcommitloader.js'; import {initIssueContentHistory} from './features/issue-content-history.js'; import {initStopwatch} from './features/stopwatch.js'; -import {initDiffShowMore} from './features/diff.js'; +import {initDiffShowMore, initShowBidi} from './features/diff.js'; import {initCommentContent, initMarkupContent} from './markup/content.js'; import {initUserAuthLinkAccountView, initUserAuthOauth2} from './features/user-auth.js'; @@ -135,6 +135,7 @@ $(document).ready(async () => { initRepoReleaseEditor(); initRepoRelease(); initDiffShowMore(); + initShowBidi(); initIssueContentHistory(); initAdminUserListSearchForm(); initGlobalCopyToClipboardListener(); diff --git a/web_src/less/_base.less b/web_src/less/_base.less index c09f3a2bdd8a..01b7c1770292 100644 --- a/web_src/less/_base.less +++ b/web_src/less/_base.less @@ -2118,3 +2118,7 @@ table th[data-sortt-desc] { height: 15px; } } + +.escaped-char { + background-color: red; +} diff --git a/web_src/less/_review.less b/web_src/less/_review.less index 12bd6a608a8b..eaa3ca31f75c 100644 --- a/web_src/less/_review.less +++ b/web_src/less/_review.less @@ -20,6 +20,15 @@ opacity: 1; } +.diff-file-box .code-has-bidi { + opacity: 1; + padding-left: 0 !important; + padding-right: 0 !important; + padding-top: 2px; + padding-bottom: 2px; + font-family: var(--fonts-emoji); +} + .repository .diff-file-box .code-diff .add-comment-left, .repository .diff-file-box .code-diff .add-comment-right, .repository .diff-file-box .code-diff .add-code-comment .add-comment-left,