-
Notifications
You must be signed in to change notification settings - Fork 341
Allow gitlab URL link shortening from non-gitlab/github.com domains #2068
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
e786e0d
0e43585
9f9df68
617b977
277730f
86dd418
96a1812
2e03182
82339e3
8158d88
0924968
13535e3
394bf57
5a7cda5
6d58054
a5d7086
b55a80e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
"""A custom Transform object to shorten github and gitlab links.""" | ||
|
||
import re | ||
|
||
from typing import ClassVar | ||
from urllib.parse import ParseResult, urlparse, urlunparse | ||
from urllib.parse import urlparse | ||
|
||
from docutils import nodes | ||
from sphinx.transforms.post_transforms import SphinxPostTransform | ||
|
@@ -12,8 +14,8 @@ | |
|
||
class ShortenLinkTransform(SphinxPostTransform): | ||
""" | ||
Shorten link when they are coming from github or gitlab and add an extra class to | ||
the tag for further styling. | ||
Shorten link when they are coming from github, gitlab, or bitbucket and add | ||
an extra class to the tag for further styling. | ||
|
||
Before: | ||
.. code-block:: html | ||
|
@@ -37,8 +39,13 @@ class ShortenLinkTransform(SphinxPostTransform): | |
supported_platform: ClassVar[dict[str, str]] = { | ||
"github.com": "github", | ||
"gitlab.com": "gitlab", | ||
"bitbucket.org": "bitbucket", | ||
} | ||
platform = None | ||
|
||
@classmethod | ||
def add_platform_mapping(cls, platform, netloc): | ||
"""Add domain->platform mapping to class at run-time.""" | ||
cls.supported_platform.update({netloc: platform}) | ||
|
||
def run(self, **kwargs): | ||
"""Run the Transform object.""" | ||
|
@@ -50,74 +57,146 @@ def run(self, **kwargs): | |
# only act if the uri and text are the same | ||
# if not the user has already customized the display of the link | ||
if uri is not None and text is not None and text == uri: | ||
uri = urlparse(uri) | ||
parsed_uri = urlparse(uri) | ||
# only do something if the platform is identified | ||
self.platform = self.supported_platform.get(uri.netloc) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not 100% sure about this part of the class refactor. I got rid of the If I am reading the code correctly, it seems odd to me that each time this class encounters a node, it changes self.platform to whatever is matched in that moment. I felt like it would be cleaner and easier to test if I just passed |
||
if self.platform is not None: | ||
node.attributes["classes"].append(self.platform) | ||
node.children[0] = nodes.Text(self.parse_url(uri)) | ||
|
||
def parse_url(self, uri: ParseResult) -> str: | ||
"""Parse the content of the url with respect to the selected platform. | ||
|
||
Args: | ||
uri: the link to the platform content | ||
|
||
Returns: | ||
the reformated url title | ||
""" | ||
path = uri.path | ||
if path == "": | ||
# plain url passed, return platform only | ||
return self.platform | ||
|
||
# if the path is not empty it contains a leading "/", which we don't want to | ||
# include in the parsed content | ||
path = path.lstrip("/") | ||
|
||
# check the platform name and read the information accordingly | ||
# as "<organisation>/<repository>#<element number>" | ||
# or "<group>/<subgroup 1>/…/<subgroup N>/<repository>#<element number>" | ||
if self.platform == "github": | ||
# split the url content | ||
parts = path.split("/") | ||
|
||
if parts[0] == "orgs" and "/projects" in path: | ||
# We have a projects board link | ||
# ref: `orgs/{org}/projects/{project-id}` | ||
text = f"{parts[1]}/projects#{parts[3]}" | ||
else: | ||
# We have an issues, PRs, or repository link | ||
if len(parts) > 0: | ||
text = parts[0] # organisation | ||
if len(parts) > 1: | ||
text += f"/{parts[1]}" # repository | ||
if len(parts) > 2: | ||
if parts[2] in ["issues", "pull", "discussions"]: | ||
text += f"#{parts[-1]}" # element number | ||
|
||
elif self.platform == "gitlab": | ||
# cp. https://docs.gitlab.com/ee/user/markdown.html#gitlab-specific-references | ||
if "/-/" in path and any( | ||
map(uri.path.__contains__, ["issues", "merge_requests"]) | ||
): | ||
group_and_subgroups, parts, *_ = path.split("/-/") | ||
parts = parts.rstrip("/") | ||
if "/" not in parts: | ||
text = f"{group_and_subgroups}/{parts}" | ||
else: | ||
parts = parts.split("/") | ||
url_type, element_number, *_ = parts | ||
if not element_number: | ||
text = group_and_subgroups | ||
elif url_type == "issues": | ||
text = f"{group_and_subgroups}#{element_number}" | ||
elif url_type == "merge_requests": | ||
text = f"{group_and_subgroups}!{element_number}" | ||
else: | ||
# display the whole uri (after "gitlab.com/") including parameters | ||
# for example "<group>/<subgroup1>/<subgroup2>/<repository>" | ||
text = uri._replace(netloc="", scheme="") # remove platform | ||
text = urlunparse(text)[1:] # combine to string and strip leading "/" | ||
|
||
return text | ||
platform = self.supported_platform.get(parsed_uri.netloc) | ||
if platform is not None: | ||
short = shorten_url(platform, uri) | ||
if short != uri: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the code change that prevents adding the platform class (and thereby the icon) unless the link is actually shortened. |
||
node.attributes["classes"].append(platform) | ||
node.children[0] = nodes.Text(short) | ||
|
||
|
||
def shorten_url(platform: str, url: str) -> str: | ||
"""Parse the content of the path with respect to the selected platform. | ||
|
||
Args: | ||
platform: "github", "gitlab", "bitbucket", etc. | ||
url: the full url to the platform content, beginning with https:// | ||
|
||
Returns: | ||
short form version of the url, | ||
or the full url if it could not shorten it | ||
""" | ||
if platform == "github": | ||
return shorten_github(url) | ||
elif platform == "bitbucket": | ||
return shorten_bitbucket(url) | ||
elif platform == "gitlab": | ||
return shorten_gitlab(url) | ||
|
||
return url | ||
|
||
|
||
def shorten_github(url: str) -> str: | ||
""" | ||
Convert a GitHub URL to a short form like owner/repo#123 or | ||
owner/repo@abc123. | ||
""" | ||
path = urlparse(url).path | ||
|
||
# Pull request URL | ||
# - Example: | ||
# - https://github.com/pydata/pydata-sphinx-theme/pull/2068 | ||
# - pydata/pydata-sphinx-theme#2068 | ||
if match := re.match(r"/([^/]+)/([^/]+)/pull/(\d+)", path): | ||
owner, repo, pr_id = match.groups() | ||
return f"{owner}/{repo}#{pr_id}" | ||
|
||
# Issue URL | ||
# - Example: | ||
# - https://github.com/pydata/pydata-sphinx-theme/issues/2176 | ||
# - pydata/pydata-sphinx-theme#2176 | ||
elif match := re.match(r"/([^/]+)/([^/]+)/issues/(\d+)", path): | ||
owner, repo, issue_id = match.groups() | ||
return f"{owner}/{repo}#{issue_id}" | ||
|
||
# Commit URL | ||
# - Example: | ||
# - https://github.com/pydata/pydata-sphinx-theme/commit/51af2a27e8a008d0b44ed9ea9b45311e686d12f7 | ||
# - pydata/pydata-sphinx-theme@51af2a2 | ||
elif match := re.match(r"/([^/]+)/([^/]+)/commit/([a-f0-9]+)", path): | ||
owner, repo, commit_hash = match.groups() | ||
return f"{owner}/{repo}@{commit_hash[:7]}" | ||
|
||
# No match — return the original URL | ||
return url | ||
|
||
|
||
def shorten_gitlab(url: str) -> str: | ||
""" | ||
Convert a GitLab URL to a short form like group/project!123 or | ||
group/project@abcdef7. | ||
|
||
Only supports canonical ('/-/') GitLab URLs. | ||
""" | ||
path = urlparse(url).path | ||
|
||
# Merge requests | ||
# - Example: | ||
# - https://gitlab.com/gitlab-org/gitlab/-/merge_requests/195598 | ||
# - gitlab-org/gitlab!195598 | ||
if match := re.match(r"^/(.+)/([^/]+)/-/merge_requests/(\d+)$", path): | ||
namespace, project, mr_id = match.groups() | ||
return f"{namespace}/{project}!{mr_id}" | ||
|
||
# Issues | ||
# - Example: | ||
# - https://gitlab.com/gitlab-org/gitlab/-/issues/551885 | ||
# - gitlab-org/gitlab#195598 | ||
# | ||
# TODO: support hash URLs, for example: | ||
# https://gitlab.com/gitlab-org/gitlab/-/issues/545699#note_2543533261 | ||
if match := re.match(r"^/(.+)/([^/]+)/-/issues/(\d+)$", path): | ||
namespace, project, issue_id = match.groups() | ||
return f"{namespace}/{project}#{issue_id}" | ||
|
||
# Commits | ||
# - Example: | ||
# - https://gitlab.com/gitlab-org/gitlab/-/commit/81872624c4c58425a040e158fd228d8f0c2bda07 | ||
# - gitlab-org/gitlab@8187262 | ||
if match := re.match(r"^/(.+)/([^/]+)/-/commit/([a-f0-9]+)$", path): | ||
namespace, project, commit_hash = match.groups() | ||
return f"{namespace}/{project}@{commit_hash[:7]}" | ||
|
||
# No match — return the original URL | ||
return url | ||
|
||
|
||
def shorten_bitbucket(url: str) -> str: | ||
""" | ||
Convert a Bitbucket URL to a short form like team/repo#123 or | ||
team/repo@main. | ||
""" | ||
path = urlparse(url).path | ||
|
||
# Pull request URL | ||
# - Example: | ||
# - https://bitbucket.org/atlassian/atlassian-jwt-js/pull-requests/23 | ||
# - atlassian/atlassian-jwt-js#23 | ||
if match := re.match(r"^/([^/]+)/([^/]+)/pull-requests/(\d+)$", path): | ||
workspace, repo, pr_id = match.groups() | ||
return f"{workspace}/{repo}#{pr_id}" | ||
|
||
# Issue URL. | ||
# - Example: | ||
# - https://bitbucket.org/atlassian/atlassian-jwt-js/issues/11/ | ||
# - atlassian/atlassian-jwt-js!11 | ||
# | ||
# Deliberately not matching the end of the string because sometimes | ||
# Bitbucket issue URLs include a slug at the end, for example: | ||
# https://bitbucket.org/atlassian/atlassian-jwt-js/issues/11/nested-object-properties-are-represented | ||
elif match := re.match(r"^/([^/]+)/([^/]+)/issues/(\d+)", path): | ||
workspace, repo, issue_id = match.groups() | ||
return f"{workspace}/{repo}!{issue_id}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Honestly, I have no clue about Bitbucket. Never used it. |
||
|
||
# Commit URL | ||
# - Example: | ||
# - https://bitbucket.org/atlassian/atlassian-jwt-js/commits/d9b5197f0aeedeabf9d0f8d0953a80be65743d8a | ||
# - atlassian/atlassian-jwt-js@d9b5197 | ||
elif match := re.match(r"^/([^/]+)/([^/]+)/commits/([a-f0-9]+)$", path): | ||
workspace, repo, commit_hash = match.groups() | ||
return f"{workspace}/{repo}@{commit_hash[:7]}" | ||
|
||
# No match — return the original URL | ||
return url |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Linter was complaining about http link in a test fixture so I excluded it here.