diff --git a/.github/lychee.toml b/.github/lychee.toml new file mode 100644 index 00000000000..ce2cad4b805 --- /dev/null +++ b/.github/lychee.toml @@ -0,0 +1,80 @@ +# .github/lychee.toml + +############################# Display ############################# +# Verbose program output +# Accepts log level: "error", "warn", "info", "debug", "trace" +verbose = "info" + +# Don't show interactive progress bar while checking links. +no_progress = true + +############################# Cache ############################### +# Enable link caching. This can be helpful to avoid checking the same links on +# multiple runs. +cache = false + +############################# Runtime ############################# +# Maximum number of concurrent link checks. +max_concurrency = 12 + +# Maximum number of allowed redirects. +max_redirects = 5 + +# Maximum number of allowed retries before a link is declared dead. +max_retries = 1 + +############################# Requests ############################ +# Website timeout from connect to response finished. +timeout = 10 + +# Minimum wait time in seconds between retries of failed requests. +retry_wait_time = 1 + +# Accept more status codes (follow redirects automatically) +accept = ["200..=204", "301..=308", "429"] + +# Avoid false fragment errors +include_fragments = false + +# Only test links with the given schemes (e.g. https). +# Omit to check links with any other scheme. +# At the moment, we support http, https, file, and mailto. +scheme = ["https"] + +# When links are available using HTTPS, treat HTTP links as errors. +require_https = false + +# Fallback extensions to apply when a URL does not specify one. +# This is common in documentation tools that cross-reference files without extensions. +fallback_extensions = ["md", "html"] + +############################# Exclusions ########################## +# Exclude URLs and mail addresses from checking (supports regex). +exclude = [ + '^mailto:', + '^https?://localhost', + '^https?://127\\.0\\.0\\.1', + '^https://www\.linkedin\.com', + '^https?://issues\.umbraco\.org/', + '^https?://web\\.archive\\.org/web/' +] + +# Exclude these filesystem paths from getting checked. +exclude_path = [ + '(^|/)node_modules/', + '(^|/)dist/', + '(^|/)bin/', + '\\.txt$', # skip .txt extensions + '(^|/)test/' # skip directories named "test" +] + +# URLs to check (supports regex). Has preference over all excludes. +include = ['gist\.github\.com.*'] + +# Skip checking mail addresses +include_mail = true + +############################# Content Checks ###################### +# Mark pages as broken if the body contains "page not found" or "404" +[content] +deny = ["(?i)page not found", "(?i)404"] diff --git a/.github/workflows/check-broken-pr-links.yml b/.github/workflows/check-broken-pr-links.yml new file mode 100644 index 00000000000..5b73b241603 --- /dev/null +++ b/.github/workflows/check-broken-pr-links.yml @@ -0,0 +1,120 @@ +name: Check Links in Pull Requests + +on: + pull_request: + branches: + - main + paths: + - '**/*.md' + +jobs: + check-links: + runs-on: ubuntu-latest + + steps: + # 1️⃣ Checkout repository + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # 2️⃣ Get changed Markdown files in the PR + - name: Get changed Markdown files + id: changed-files + run: | + CHANGED_FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep '\.md$' || true) + CHANGED_FILES="${CHANGED_FILES//$'\n'/ }" + echo "CHANGED_FILES=$CHANGED_FILES" >> $GITHUB_ENV + echo "Changed Markdown files: $CHANGED_FILES" + + # 3️⃣ Skip if no Markdown files changed + - name: Skip if no Markdown files changed + if: env.CHANGED_FILES == '' + run: | + echo "No Markdown files changed. Skipping link check." + exit 0 + + # 4️⃣ Run Lychee on changed files + - name: Run Lychee + id: run-lychee + uses: lycheeverse/lychee-action@v2 + with: + args: | + --no-progress + --include-fragments + --format detailed + ${{ env.CHANGED_FILES }} + output: lychee/out_raw.md + fail: false # ✅ don't fail yet, let us capture output + + # 5️⃣ Format Lychee output (user-friendly, relative paths) + - name: Format Lychee report + id: format-report + if: always() + run: | + mkdir -p lychee + : > lychee/comment.md # start with empty file + + awk ' + /^Errors in / { + file=$3 + gsub("^/home/runner/work/UmbracoDocs/UmbracoDocs/", "", file) + print "\nBroken links found in:\n" file >> "lychee/comment.md" + next + } + + /\[ERROR\]/ { + msg = $0 + sub(/^- \[ \] /, "", msg) + sub(/^\[ERROR\] /, "", msg) + gsub("^file:///home/runner/work/UmbracoDocs/UmbracoDocs/", "", msg) + print "\n⚓ Anchor not found → " msg >> "lychee/comment.md" + next + } + + /\[404\]/ { + msg = $0 + sub(/^- \[ \] /, "", msg) + sub(/^\[404\] /, "", msg) + print "\n❌ 404 Not Found → " msg >> "lychee/comment.md" + next + } + + /\[301\]|\[302\]/ { + msg = $0 + sub(/^- \[ \] /, "", msg) + sub(/^\[(301|302)\] /, "", msg) + print "\n🔀 Redirect → " msg >> "lychee/comment.md" + next + } + + /Timeout/ && !/Timeouts/ { + msg = $0 + sub(/^- \[ \] /, "", msg) + print "\n⏳ Timeout → " msg >> "lychee/comment.md" + next + } + ' lychee/out_raw.md + + # Add header only if we found content + if [ -s lychee/comment.md ]; then + sed -i '1i **The Link Checker found broken links in your PR**.\n Please review the following list:\n' lychee/comment.md + echo "has_content=true" >> $GITHUB_OUTPUT + else + echo "has_content=false" >> $GITHUB_OUTPUT + fi + + # 6️⃣ Comment broken links on PR (if present) + - name: Comment broken links + if: always() && (env.CHANGED_FILES != '') && (steps.format-report.outputs.has_content == 'true') + uses: marocchino/sticky-pull-request-comment@v2 + with: + path: lychee/comment.md + recreate: true + + # 7️⃣ Fail workflow if broken links exist + - name: Fail workflow if broken links + if: steps.format-report.outputs.has_content == 'true' + run: | + echo "❌ Broken links detected. Please review the PR comment for details." + exit 1 diff --git a/.gitignore b/.gitignore index 655ba93439d..946cb65a602 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ *.orig .vscode .idea +.lycheecache diff --git a/.lycheeignore b/.lycheeignore new file mode 100644 index 00000000000..c149333be85 --- /dev/null +++ b/.lycheeignore @@ -0,0 +1,31 @@ +# These links are ignored by lychee link checker: https://github.com/lycheeverse/lychee +# The file allows you to list multiple regular expressions for exclusion (one pattern per line). +# The `.lycheeignore` file is only used for excluding URLs, not paths. Use the `exclude_path` key in the `lychee.toml` file. ref: https://lychee.cli.rs/recipes/excluding-paths/ + +# GitHub blob/tree fragment links +^https://github\.com/umbraco/Umbraco-CMS/blob/.*/.*#L.* +^https://github\.com/umbraco/Umbraco-CMS/tree/.* +^https://github\.com/Shazwazza/Articulate/blob/.*/.*#L.* +^https://github\.com/umbraco/Umbraco-CMS/blob/.* + +# Anchor/fragment links causing false positives +^https://apidocs\.umbraco\.com/.*/#.* +^https://tinymce\.github\.io/.*/#.* +^https://openid\.net/.*/#.* +^https://docs\.microsoft\.com/.*#.* +^https://learn\.microsoft\.com/.*#.* +^https://developer\.mozilla\.org/.*/#.* +^https://learning\.postman\.com/docs/.*/#.* +^https://nginx\.org/.*/#.* +^https://azure\.microsoft\.com/en-gb/services/media-services/.* +^https://www\.tiny\.cloud/docs/.* + +# TinyMCE anchors +^https://github\.com/tinymce/tinymce/issues/.*#.* + +# NIST FIPS and other static docs +^https://csrc\.nist\.gov/publications/PubsFIPS\.html#.* + +# Timeout-prone Umbraco issue links +^https://issues\.umbraco\.org/issue/.* +^https://issues\.umbraco\.org/issues/.*