diff --git a/README.md b/README.md index 150e6c7..c3444a9 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,7 @@ htmltest uses a YAML configuration file. Put `.htmltest.yml` in the same directo | `DirectoryIndex` | The file to look for when linking to a directory. | `index.html` | | `FilePath` | Single file to test within `DirectoryPath`, omit to test all. | | | `FileExtension` | Extension of your HTML documents, includes the dot. If `FilePath` is set we use the extension from that. | `.html` | +| `BaseURL` | Publication URL of the site, including subfolder if applicable. | | | `CheckDoctype` | Enables checking the document type declaration. | `true` | | `CheckAnchors` | Enables checking ``. | `false` | | `EnforceHTTPS` | Fails when encountering an `http://` link. Useful to prevent mixed content errors when serving over HTTPS. | `false` | | `IgnoreURLs` | Array of regexs of URLs to ignore. | empty | diff --git a/htmldoc/document_store.go b/htmldoc/document_store.go index adf4386..59533c4 100644 --- a/htmldoc/document_store.go +++ b/htmldoc/document_store.go @@ -4,9 +4,11 @@ package htmldoc import ( + "net/url" "os" "path" "regexp" + "strings" "github.com/wjdp/htmltest/output" ) @@ -14,6 +16,7 @@ import ( // DocumentStore struct, store of Documents including Document discovery type DocumentStore struct { BasePath string // Path, relative to cwd, the site is located in + BaseURL *url.URL // Base URL of the site IgnorePatterns []interface{} // Regexes of directories to ignore Documents []*Document // All of the documents, used to iterate over DocumentPathMap map[string]*Document // Maps slash separated paths to documents @@ -104,7 +107,17 @@ func (dS *DocumentStore) ResolvePath(refPath string) (*Document, bool) { if refPath[0] == '/' && len(refPath) > 1 { // Is an absolute link, remove the leading slash for map lookup - refPath = refPath[1:] + if dS.BaseURL == nil { + // No base URL, so `/` means our root + refPath = refPath[1:] + } else { + // We have a Base URL, so need to trip off the base path if present + refPath = strings.TrimPrefix(refPath, dS.BaseURL.Path) + + // We want to end up with a relative path, so remove leading '/' if present + // (This happens if BaseURL does *not* end in '/') + refPath = strings.TrimPrefix(refPath, "/") + } } // Try path as-is, path.ext diff --git a/htmltest/check-link.go b/htmltest/check-link.go index aacdaa1..5f20122 100644 --- a/htmltest/check-link.go +++ b/htmltest/check-link.go @@ -17,8 +17,9 @@ import ( ) // ignoredRels: List of rel values to ignore, dns-prefetch and preconnect are ignored as they are not links to be -// followed rather telling browser we want something on that host, if the root of that host is not valid, -// it's likely not a problem. +// +// followed rather telling browser we want something on that host, if the root of that host is not valid, +// it's likely not a problem. var ignoredRels = [...]string{"dns-prefetch", "preconnect"} func (hT *HTMLTest) checkLink(document *htmldoc.Document, node *html.Node) { @@ -139,6 +140,26 @@ func (hT *HTMLTest) checkExternal(ref *htmldoc.Reference) { return } + // Is this an external reference to a local file? + if hT.opts.CheckSelfReferencesAsInternal && hT.documentStore.BaseURL != nil { + + if ref.URL.Host == hT.documentStore.BaseURL.Host && + hT.documentStore.BaseURL.User == nil && + strings.HasPrefix(ref.URL.Path, hT.documentStore.BaseURL.Path) { + // Convert to internal reference + internalURL := *ref.URL + internalURL.Scheme = "" + internalURL.Host = "" + + internalRef := *ref + internalRef.URL = &internalURL + internalRef.Path = internalURL.String() + + hT.checkInternal(&internalRef) + return + } + } + urlStr := ref.URLString() // Does this url match an url ignore rule? diff --git a/htmltest/check-link_test.go b/htmltest/check-link_test.go index 3322b02..82ce2d5 100644 --- a/htmltest/check-link_test.go +++ b/htmltest/check-link_test.go @@ -767,6 +767,53 @@ func TestAnchorBlankHTML4(t *testing.T) { tExpectIssueCount(t, hT2, 1) } +func TestAnchorInternalAbsolute(t *testing.T) { + // works for internal absolute links + hT := tTestFile("fixtures/links/absoluteLinks.html") + tExpectIssueCount(t, hT, 0) +} + +func TestAnchorInternalAbsoluteRootPublication(t *testing.T) { + // works for internal absolute links when site is published to root of domain + hT := tTestFileOpts("fixtures/links/absoluteLinksRootPublication.html", + map[string]interface{}{"BaseURL": "http://example.com"}) + tExpectIssueCount(t, hT, 0) +} + +func TestAnchorInternalAbsoluteFolderPublication(t *testing.T) { + // works for internal absolute links when site is published to a folder + hT := tTestFileOpts("fixtures/links/absoluteLinksFolderPublication.html", + map[string]interface{}{"BaseURL": "http://www.example.com/blog"}) + tExpectIssueCount(t, hT, 0) +} + +func TestAnchorInternalBrokenAbsoluteFolderPublication(t *testing.T) { + // works for missing internal absolute links when site is published to a folder + hT := tTestFileOpts("fixtures/links/absoluteBrokenLinksFolderPublication.html", + map[string]interface{}{"BaseURL": "http://www.example.com/blog"}) + tExpectIssueCount(t, hT, 2) +} + +func TestAnchorExternalLinksToInternalFiles(t *testing.T) { + // works for external links that reference internal files + hT := tTestFileOpts("fixtures/links/externalLinksToInternalFiles.html", + map[string]interface{}{ + "BaseURL": "http://www.example.com/blog", + "CheckSelfReferencesAsInternal": true, + }) + tExpectIssueCount(t, hT, 0) +} + +func TestAnchorExternalLinksToExternalPages(t *testing.T) { + // works for external links that don't reference internal files because the subfolder is different + hT := tTestFileOpts("fixtures/links/externalLinksToExternalPages.html", + map[string]interface{}{ + "BaseURL": "http://www.example.com/blog", + "CheckSelfReferencesAsInternal": true, + }) + tExpectIssueCount(t, hT, 2) +} + func TestSelfSignedLink(t *testing.T) { tSkipShortExternal(t) hT := tTestFileOpts("fixtures/links/selfSignedLink.html", diff --git a/htmltest/fixtures/links/absoluteBrokenLinksFolderPublication.html b/htmltest/fixtures/links/absoluteBrokenLinksFolderPublication.html new file mode 100644 index 0000000..542bb7f --- /dev/null +++ b/htmltest/fixtures/links/absoluteBrokenLinksFolderPublication.html @@ -0,0 +1,2 @@ +Missing file in root of site +Missing file in folder of site \ No newline at end of file diff --git a/htmltest/fixtures/links/absoluteLinks.html b/htmltest/fixtures/links/absoluteLinks.html new file mode 100644 index 0000000..bd32420 --- /dev/null +++ b/htmltest/fixtures/links/absoluteLinks.html @@ -0,0 +1,2 @@ +Relative to root +Also relative to root \ No newline at end of file diff --git a/htmltest/fixtures/links/absoluteLinksFolderPublication.html b/htmltest/fixtures/links/absoluteLinksFolderPublication.html new file mode 100644 index 0000000..25aba9e --- /dev/null +++ b/htmltest/fixtures/links/absoluteLinksFolderPublication.html @@ -0,0 +1,2 @@ +Relative to root +Also relative to root \ No newline at end of file diff --git a/htmltest/fixtures/links/absoluteLinksRootPublication.html b/htmltest/fixtures/links/absoluteLinksRootPublication.html new file mode 100644 index 0000000..bd32420 --- /dev/null +++ b/htmltest/fixtures/links/absoluteLinksRootPublication.html @@ -0,0 +1,2 @@ +Relative to root +Also relative to root \ No newline at end of file diff --git a/htmltest/fixtures/links/externalLinksToExternalPages.html b/htmltest/fixtures/links/externalLinksToExternalPages.html new file mode 100644 index 0000000..b1d90f5 --- /dev/null +++ b/htmltest/fixtures/links/externalLinksToExternalPages.html @@ -0,0 +1,2 @@ +Path 'folder' does not match, should not be mapped to internal file +Path 'issues' does not match, should not map to known file \ No newline at end of file diff --git a/htmltest/fixtures/links/externalLinksToInternalFiles.html b/htmltest/fixtures/links/externalLinksToInternalFiles.html new file mode 100644 index 0000000..abe3ea6 --- /dev/null +++ b/htmltest/fixtures/links/externalLinksToInternalFiles.html @@ -0,0 +1,2 @@ +External HTTP reference +External HTTPS reference \ No newline at end of file diff --git a/htmltest/htmltest.go b/htmltest/htmltest.go index e30bc62..3cec597 100644 --- a/htmltest/htmltest.go +++ b/htmltest/htmltest.go @@ -7,6 +7,7 @@ import ( "errors" "fmt" "net/http" + "net/url" "os" "path" "strings" @@ -149,6 +150,17 @@ func Test(optsUser map[string]interface{}) (*HTMLTest, error) { hT.documentStore.DirectoryIndex = hT.opts.DirectoryIndex hT.documentStore.IgnorePatterns = hT.opts.IgnoreDirs hT.documentStore.IgnoreTagAttribute = hT.opts.IgnoreTagAttribute + + if hT.opts.BaseURL != "" { + baseURL, err := url.Parse(hT.opts.BaseURL) + if err != nil { + err := fmt.Errorf("Could not parse BaseURL '%s': %w", hT.opts.BaseURL, err) + return &hT, err + } + + hT.documentStore.BaseURL = baseURL + } + // Discover documents hT.documentStore.Discover() diff --git a/htmltest/options.go b/htmltest/options.go index 6ed0088..3e5dd3b 100644 --- a/htmltest/options.go +++ b/htmltest/options.go @@ -19,6 +19,8 @@ type Options struct { FilePath string FileExtension string + BaseURL string + CheckDoctype bool CheckAnchors bool CheckLinks bool @@ -27,13 +29,14 @@ type Options struct { CheckMeta bool CheckGeneric bool - CheckExternal bool - CheckInternal bool - CheckInternalHash bool - CheckMailto bool - CheckTel bool - CheckFavicon bool - CheckMetaRefresh bool + CheckExternal bool + CheckInternal bool + CheckInternalHash bool + CheckMailto bool + CheckTel bool + CheckFavicon bool + CheckMetaRefresh bool + CheckSelfReferencesAsInternal bool EnforceHTML5 bool EnforceHTTPS bool