Skip to content

Commit

Permalink
limit TitleExtractor to allow only Remark42 whitelisted domains
Browse files Browse the repository at this point in the history
Allowed domains consist of `REMARK_URL` second-level domain (or whole IP in case it's IP like 127.0.0.1) and `ALLOWED_HOSTS`. That is needed to prevent Remark42 from asking arbitrary servers and storing the page title as the comment.PostTitle.

Previous behaviour allowed the caller of the API to create a comment
with an arbitrary URL and learn the title of the page, which might be
accessible to the server Remark42 is installed on but not to the user
outside that network (CWE-918).
  • Loading branch information
paskal committed Oct 10, 2023
1 parent 7a71d47 commit 25a8d88
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 29 deletions.
24 changes: 23 additions & 1 deletion backend/app/cmd/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"embed"
"fmt"
"net"
"net/http"
"net/url"
"os"
Expand Down Expand Up @@ -495,6 +496,12 @@ func (s *ServerCommand) newServerApp(ctx context.Context) (*serverApp, error) {
}
log.Printf("[DEBUG] image service for url=%s, EditDuration=%v", imageService.ImageAPI, imageService.EditDuration)

// Extract second level domain from s.RemarkURL. It can be and IP like http//127.0.0.1 in which case we need to use whole IP as domain
allowedDomains, err := s.getAllowedDomains()
if err != nil {
return nil, fmt.Errorf("failed to make allowed domains list: %w", err)
}

dataService := &service.DataStore{
Engine: storeEngine,
EditDuration: s.EditDuration,
Expand All @@ -504,7 +511,7 @@ func (s *ServerCommand) newServerApp(ctx context.Context) (*serverApp, error) {
MaxVotes: s.MaxVotes,
PositiveScore: s.PositiveScore,
ImageService: imageService,
TitleExtractor: service.NewTitleExtractor(http.Client{Timeout: time.Second * 5}),
TitleExtractor: service.NewTitleExtractor(http.Client{Timeout: time.Second * 5}, allowedDomains),
RestrictedWordsMatcher: service.NewRestrictedWordsMatcher(service.StaticRestrictedWordsLister{Words: s.RestrictedWords}),
}
dataService.RestrictSameIPVotes.Enabled = s.RestrictVoteIP
Expand Down Expand Up @@ -633,6 +640,21 @@ func (s *ServerCommand) newServerApp(ctx context.Context) (*serverApp, error) {
}, nil
}

func (s *ServerCommand) getAllowedDomains() ([]string, error) {
remark42URL, err := url.Parse(s.RemarkURL)
if err != nil {
return nil, fmt.Errorf("failed to parse REMARK_URL: %w", err)
}
remark42Domain := remark42URL.Hostname()
if net.ParseIP(remark42Domain) == nil && len(strings.Split(remark42Domain, ".")) > 2 {
remark42Domain = strings.Join(strings.Split(remark42Domain, ".")[len(strings.Split(remark42Domain, "."))-2:], ".")
}

allowedDomains := s.AllowedHosts
allowedDomains = append(allowedDomains, remark42Domain)
return allowedDomains, nil
}

// Run all application objects
func (a *serverApp) run(ctx context.Context) error {
if a.AdminPasswd != "" {
Expand Down
2 changes: 1 addition & 1 deletion backend/app/rest/api/admin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func TestAdmin_Title(t *testing.T) {
ts, srv, teardown := startupT(t)
defer teardown()

srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second})
srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second}, []string{"127.0.0.1"})
tss := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.String() == "/post1" {
_, err := w.Write([]byte("<html><title>post1 blah 123</title><body> 2222</body></html>"))
Expand Down
5 changes: 3 additions & 2 deletions backend/app/rest/api/rest_public_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ func TestRest_FindUserComments(t *testing.T) {

func TestRest_FindUserComments_CWE_918(t *testing.T) {
ts, srv, teardown := startupT(t)
srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second}) // required for extracting the title, bad URL test
srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second}, []string{"radio-t.com"}) // required for extracting the title, bad URL test
defer srv.DataService.TitleExtractor.Close()
defer teardown()

Expand All @@ -496,7 +496,8 @@ func TestRest_FindUserComments_CWE_918(t *testing.T) {

assert.False(t, backendRequestedArbitraryServer)
addComment(t, arbitraryURLComment, ts)
assert.True(t, backendRequestedArbitraryServer)
assert.False(t, backendRequestedArbitraryServer,
"no request is expected to the test server as it's not in the list of the allowed domains for the title extractor")

res, code := get(t, ts.URL+"/api/v1/comments?site=remark42&user=provider1_dev")
assert.Equal(t, http.StatusOK, code)
Expand Down
10 changes: 5 additions & 5 deletions backend/app/store/service/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func TestService_CreateFromPartialWithTitle(t *testing.T) {
eng, teardown := prepStoreEngine(t)
defer teardown()
b := DataStore{Engine: eng, AdminStore: ks,
TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second})}
TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})}
defer b.Close()

postPath := "/post/42"
Expand Down Expand Up @@ -195,7 +195,7 @@ func TestService_SetTitle(t *testing.T) {
eng, teardown := prepStoreEngine(t)
defer teardown()
b := DataStore{Engine: eng, AdminStore: ks,
TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second})}
TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})}
defer b.Close()
comment := store.Comment{
Text: "text",
Expand Down Expand Up @@ -1658,10 +1658,10 @@ func TestService_DoubleClose_Static(t *testing.T) {
eng, teardown := prepStoreEngine(t)
defer teardown()
b := DataStore{Engine: eng, AdminStore: ks,
TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second})}
b.Close()
TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{})}
assert.NoError(t, b.Close())
// second call should not result in panic or errors
b.Close()
assert.NoError(t, b.Close())
}

// makes new boltdb, put two records
Expand Down
42 changes: 30 additions & 12 deletions backend/app/store/service/title.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"

Expand All @@ -19,14 +20,16 @@ const (

// TitleExtractor gets html title from remote page, cached
type TitleExtractor struct {
client http.Client
cache lcw.LoadingCache
client http.Client
cache lcw.LoadingCache
allowedDomains []string
}

// NewTitleExtractor makes extractor with cache. If memory cache failed, switching to no-cache
func NewTitleExtractor(client http.Client) *TitleExtractor {
func NewTitleExtractor(client http.Client, allowedDomains []string) *TitleExtractor {
res := TitleExtractor{
client: client,
client: client,
allowedDomains: allowedDomains,
}
var err error
res.cache, err = lcw.NewExpirableCache(lcw.TTL(teCacheTTL), lcw.MaxKeySize(teCacheMaxRecs))
Expand All @@ -38,33 +41,48 @@ func NewTitleExtractor(client http.Client) *TitleExtractor {
}

// Get page for url and return title
func (t *TitleExtractor) Get(url string) (string, error) {
func (t *TitleExtractor) Get(pageURL string) (string, error) {
// parse domain of the URL and check if it's in the allowed list
u, err := url.Parse(pageURL)
if err != nil {
return "", fmt.Errorf("failed to parse url %s: %w", pageURL, err)
}
allowed := false
for _, domain := range t.allowedDomains {
if strings.HasSuffix(u.Hostname(), domain) {
allowed = true
break
}
}
if !allowed {
return "", fmt.Errorf("domain %s is not allowed", u.Host)
}
client := http.Client{Timeout: t.client.Timeout, Transport: t.client.Transport}
defer client.CloseIdleConnections()
b, err := t.cache.Get(url, func() (interface{}, error) {
resp, err := client.Get(url)
if err != nil {
return nil, fmt.Errorf("failed to load page %s: %w", url, err)
b, err := t.cache.Get(pageURL, func() (interface{}, error) {
resp, e := client.Get(pageURL)
if e != nil {
return nil, fmt.Errorf("failed to load page %s: %w", pageURL, e)
}
defer func() {
if err = resp.Body.Close(); err != nil {
log.Printf("[WARN] failed to close title extractor body, %v", err)
}
}()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("can't load page %s, code %d", url, resp.StatusCode)
return nil, fmt.Errorf("can't load page %s, code %d", pageURL, resp.StatusCode)
}

title, ok := t.getTitle(resp.Body)
if !ok {
return nil, fmt.Errorf("can't get title for %s", url)
return nil, fmt.Errorf("can't get title for %s", pageURL)
}
return title, nil
})

// on error save result (empty string) to cache too and return "" title
if err != nil {
_, _ = t.cache.Get(url, func() (interface{}, error) { return "", nil })
_, _ = t.cache.Get(pageURL, func() (interface{}, error) { return "", nil })
return "", err
}

Expand Down
16 changes: 8 additions & 8 deletions backend/app/store/service/title_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func TestTitle_GetTitle(t *testing.T) {
{`<html><body> 2222</body></html>`, false, ""},
}

ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second})
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{})
defer ex.Close()
for i, tt := range tbl {
tt := tt
Expand All @@ -41,7 +41,7 @@ func TestTitle_GetTitle(t *testing.T) {
}

func TestTitle_Get(t *testing.T) {
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second})
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})
defer ex.Close()
var hits int32
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -75,7 +75,7 @@ func TestTitle_GetConcurrent(t *testing.T) {
for n := 0; n < 1000; n++ {
body += "something something blah blah\n"
}
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second})
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})
defer ex.Close()
var hits int32
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -104,7 +104,7 @@ func TestTitle_GetConcurrent(t *testing.T) {
}

func TestTitle_GetFailed(t *testing.T) {
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second})
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})
defer ex.Close()
var hits int32
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
Expand All @@ -124,9 +124,9 @@ func TestTitle_GetFailed(t *testing.T) {
assert.Equal(t, int32(1), atomic.LoadInt32(&hits), "hit once, errors cached")
}

func TestTitle_DoubleClosed(*testing.T) {
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second})
ex.Close()
func TestTitle_DoubleClosed(t *testing.T) {
ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{})
assert.NoError(t, ex.Close())
// second call should not result in panic
ex.Close()
assert.NoError(t, ex.Close())
}

0 comments on commit 25a8d88

Please sign in to comment.