From 733839c56c9b2bc3ab310426ef727f2b5c1ed3a3 Mon Sep 17 00:00:00 2001 From: Albert Wu Date: Wed, 27 May 2026 22:05:16 -0700 Subject: [PATCH 1/3] feat(phabricator): add ChangeID entity for parsing phab:// URIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors entity/github.ChangeID with a Phabricator-shaped identifier: phab://D{revision_id}/{diff_id}. Parser validates scheme, the "D" prefix on the revision segment, positive integer revision/diff IDs, and exact segment count. Adds a Revision() accessor that returns the canonical "D{n}" form used everywhere in the Phabricator UI and CLI. No downstream consumers yet — changeprovider/mergechecker/pusher wiring will come in follow-up PRs. Co-Authored-By: Claude Opus 4.7 (1M context) --- entity/phabricator/BUILD.bazel | 18 ++++ entity/phabricator/change_id.go | 128 +++++++++++++++++++++++++++ entity/phabricator/change_id_test.go | 75 ++++++++++++++++ 3 files changed, 221 insertions(+) create mode 100644 entity/phabricator/BUILD.bazel create mode 100644 entity/phabricator/change_id.go create mode 100644 entity/phabricator/change_id_test.go diff --git a/entity/phabricator/BUILD.bazel b/entity/phabricator/BUILD.bazel new file mode 100644 index 00000000..e561b32e --- /dev/null +++ b/entity/phabricator/BUILD.bazel @@ -0,0 +1,18 @@ +load("@rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "phabricator", + srcs = ["change_id.go"], + importpath = "github.com/uber/submitqueue/entity/phabricator", + visibility = ["//visibility:public"], +) + +go_test( + name = "phabricator_test", + srcs = ["change_id_test.go"], + embed = [":phabricator"], + deps = [ + "@com_github_stretchr_testify//assert", + "@com_github_stretchr_testify//require", + ], +) diff --git a/entity/phabricator/change_id.go b/entity/phabricator/change_id.go new file mode 100644 index 00000000..aadf0df7 --- /dev/null +++ b/entity/phabricator/change_id.go @@ -0,0 +1,128 @@ +// Copyright (c) 2026 Uber Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package phabricator + +import ( + "fmt" + "strconv" + "strings" +) + +// changeIDFormat is the expected format for change IDs, included in error messages. +const changeIDFormat = "{scheme}://D{revision_id}/{diff_id}" + +// scheme is the canonical URI scheme for Phabricator change identifiers. +// Unlike GitHub (which has multiple variants: github / ghe / ghes), Phabricator +// has a single scheme; the host is encoded out-of-band via queue config. +const scheme = "phab" + +// revisionPrefix is the literal "D" character that prefixes Phabricator revision +// IDs (e.g., D12345). Mirrors how revisions are referenced everywhere in the +// Phabricator UI and CLI ("arc diff", "arc patch D12345"), so URIs can be +// constructed by trivial substitution rather than reshaping. +const revisionPrefix = "D" + +// ChangeID represents a parsed Phabricator change identifier. +// Format: phab://D{revision_id}/{diff_id} +// +// Revision and diff are Phabricator's two-level identifier model: +// - RevisionID names the logical review (stable across updates, e.g., D12345). +// - DiffID names a specific uploaded patch version of that revision +// (increments on every "arc diff" update). It pins the change to an exact +// code state, analogous to GitHub's head commit SHA on a pull request. +type ChangeID struct { + // Scheme captures the URI scheme. Always "phab" today; kept as a field so + // the parsed form mirrors entity/github.ChangeID and so future variants + // (e.g., a separate scheme per Phabricator install) are a non-breaking add. + Scheme string + // RevisionID is the numeric portion of the Phabricator revision identifier + // (the digits after the leading "D"). For example, "D12345" parses to 12345. + RevisionID int + // DiffID is the numeric Phabricator diff identifier that pins a specific + // uploaded patch version of the revision. + DiffID int +} + +// ParseChangeID parses a raw change ID string into a ChangeID. +// Expected format: phab://D{revision_id}/{diff_id} +// The parser splits on "://" to separate scheme from path, then the path into +// exactly two segments: the D-prefixed revision and the diff ID. +func ParseChangeID(raw string) (ChangeID, error) { + schemeSplit := strings.SplitN(raw, "://", 2) + if len(schemeSplit) != 2 { + return ChangeID{}, fmt.Errorf("invalid change ID %q: missing '://' separator (expected format: %s)", raw, changeIDFormat) + } + + gotScheme := schemeSplit[0] + if gotScheme != scheme { + return ChangeID{}, fmt.Errorf("invalid change ID %q: scheme must be %q, got %q (expected format: %s)", raw, scheme, gotScheme, changeIDFormat) + } + + path := schemeSplit[1] + + segments := strings.Split(path, "/") + if len(segments) != 2 { + return ChangeID{}, fmt.Errorf("invalid change ID %q: path must have exactly 2 segments (revision/diff), got %d (expected format: %s)", raw, len(segments), changeIDFormat) + } + + revisionSegment := segments[0] + diffSegment := segments[1] + + if revisionSegment == "" { + return ChangeID{}, fmt.Errorf("invalid change ID %q: empty revision segment (expected format: %s)", raw, changeIDFormat) + } + if !strings.HasPrefix(revisionSegment, revisionPrefix) { + return ChangeID{}, fmt.Errorf("invalid change ID %q: revision %q must start with %q (expected format: %s)", raw, revisionSegment, revisionPrefix, changeIDFormat) + } + revisionDigits := revisionSegment[len(revisionPrefix):] + if revisionDigits == "" { + return ChangeID{}, fmt.Errorf("invalid change ID %q: revision %q has no digits after %q (expected format: %s)", raw, revisionSegment, revisionPrefix, changeIDFormat) + } + revisionID, err := strconv.Atoi(revisionDigits) + if err != nil { + return ChangeID{}, fmt.Errorf("invalid change ID %q: revision digits %q is not a valid integer (expected format: %s)", raw, revisionDigits, changeIDFormat) + } + if revisionID <= 0 { + return ChangeID{}, fmt.Errorf("invalid change ID %q: revision ID must be positive, got %d (expected format: %s)", raw, revisionID, changeIDFormat) + } + + if diffSegment == "" { + return ChangeID{}, fmt.Errorf("invalid change ID %q: empty diff ID (expected format: %s)", raw, changeIDFormat) + } + diffID, err := strconv.Atoi(diffSegment) + if err != nil { + return ChangeID{}, fmt.Errorf("invalid change ID %q: diff ID %q is not a valid integer (expected format: %s)", raw, diffSegment, changeIDFormat) + } + if diffID <= 0 { + return ChangeID{}, fmt.Errorf("invalid change ID %q: diff ID must be positive, got %d (expected format: %s)", raw, diffID, changeIDFormat) + } + + return ChangeID{ + Scheme: gotScheme, + RevisionID: revisionID, + DiffID: diffID, + }, nil +} + +// String returns the string representation of the change ID. +func (c ChangeID) String() string { + return fmt.Sprintf("%s://%s%d/%d", c.Scheme, revisionPrefix, c.RevisionID, c.DiffID) +} + +// Revision returns the Phabricator revision identifier in its canonical +// D-prefixed string form (e.g., "D12345"). +func (c ChangeID) Revision() string { + return fmt.Sprintf("%s%d", revisionPrefix, c.RevisionID) +} diff --git a/entity/phabricator/change_id_test.go b/entity/phabricator/change_id_test.go new file mode 100644 index 00000000..ab4db6a7 --- /dev/null +++ b/entity/phabricator/change_id_test.go @@ -0,0 +1,75 @@ +// Copyright (c) 2026 Uber Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package phabricator + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestParseChangeID covers one case per distinct branch in the parser. +// The happy path is also the round-trip subject (see TestParseChangeID_RoundTrip), +// which proves String() inverts ParseChangeID — so there is no separate +// TestChangeID_String. +func TestParseChangeID(t *testing.T) { + tests := []struct { + name string + raw string + want ChangeID + wantErr bool + }{ + { + name: "valid", + raw: "phab://D123/456", + want: ChangeID{Scheme: "phab", RevisionID: 123, DiffID: 456}, + }, + {name: "missing separator", raw: "phab:D123/456", wantErr: true}, + {name: "wrong scheme", raw: "github://D123/456", wantErr: true}, + {name: "wrong segment count", raw: "phab://D123", wantErr: true}, + {name: "missing revision prefix", raw: "phab://123/456", wantErr: true}, + {name: "revision prefix without digits", raw: "phab://D/456", wantErr: true}, + {name: "non-numeric revision", raw: "phab://Dabc/456", wantErr: true}, + {name: "non-positive revision", raw: "phab://D0/456", wantErr: true}, + {name: "empty diff", raw: "phab://D123/", wantErr: true}, + {name: "non-numeric diff", raw: "phab://D123/abc", wantErr: true}, + {name: "non-positive diff", raw: "phab://D123/0", wantErr: true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ParseChangeID(tt.raw) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestChangeID_Revision(t *testing.T) { + id := ChangeID{Scheme: "phab", RevisionID: 12345, DiffID: 67890} + assert.Equal(t, "D12345", id.Revision()) +} + +func TestParseChangeID_RoundTrip(t *testing.T) { + raw := "phab://D123/456" + parsed, err := ParseChangeID(raw) + require.NoError(t, err) + assert.Equal(t, raw, parsed.String()) +} From 9f2d6c8578d08f099900d5fcdef95d559e864729 Mon Sep 17 00:00:00 2001 From: Albert Wu Date: Wed, 27 May 2026 22:25:19 -0700 Subject: [PATCH 2/3] refactor(phabricator): collapse ChangeID segment validation into regex Replace ~25 lines of per-branch checks (empty / prefix / digits-after-D / non-numeric / non-positive) with two precompiled patterns: - revisionPattern: ^D([1-9]\d*)$ - diffPattern: ^[1-9]\d*$ One error per segment instead of four/three. Also rejects leading-zero forms like phab://D01/02 which previously parsed silently and broke the round-trip with String(). Adds two test cases covering the new strictness. Co-Authored-By: Claude Opus 4.7 (1M context) --- entity/phabricator/change_id.go | 42 +++++++++++----------------- entity/phabricator/change_id_test.go | 2 ++ 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/entity/phabricator/change_id.go b/entity/phabricator/change_id.go index aadf0df7..25ce32ed 100644 --- a/entity/phabricator/change_id.go +++ b/entity/phabricator/change_id.go @@ -16,6 +16,7 @@ package phabricator import ( "fmt" + "regexp" "strconv" "strings" ) @@ -34,6 +35,15 @@ const scheme = "phab" // constructed by trivial substitution rather than reshaping. const revisionPrefix = "D" +// revisionPattern matches a revision segment: the literal "D" followed by a +// positive integer with no leading zero (matches the strict round-trip form +// produced by String()). +var revisionPattern = regexp.MustCompile(`^D([1-9]\d*)$`) + +// diffPattern matches a diff segment: a positive integer with no leading zero +// (matches the strict round-trip form produced by String()). +var diffPattern = regexp.MustCompile(`^[1-9]\d*$`) + // ChangeID represents a parsed Phabricator change identifier. // Format: phab://D{revision_id}/{diff_id} // @@ -80,34 +90,16 @@ func ParseChangeID(raw string) (ChangeID, error) { revisionSegment := segments[0] diffSegment := segments[1] - if revisionSegment == "" { - return ChangeID{}, fmt.Errorf("invalid change ID %q: empty revision segment (expected format: %s)", raw, changeIDFormat) - } - if !strings.HasPrefix(revisionSegment, revisionPrefix) { - return ChangeID{}, fmt.Errorf("invalid change ID %q: revision %q must start with %q (expected format: %s)", raw, revisionSegment, revisionPrefix, changeIDFormat) - } - revisionDigits := revisionSegment[len(revisionPrefix):] - if revisionDigits == "" { - return ChangeID{}, fmt.Errorf("invalid change ID %q: revision %q has no digits after %q (expected format: %s)", raw, revisionSegment, revisionPrefix, changeIDFormat) - } - revisionID, err := strconv.Atoi(revisionDigits) - if err != nil { - return ChangeID{}, fmt.Errorf("invalid change ID %q: revision digits %q is not a valid integer (expected format: %s)", raw, revisionDigits, changeIDFormat) - } - if revisionID <= 0 { - return ChangeID{}, fmt.Errorf("invalid change ID %q: revision ID must be positive, got %d (expected format: %s)", raw, revisionID, changeIDFormat) + revisionMatch := revisionPattern.FindStringSubmatch(revisionSegment) + if revisionMatch == nil { + return ChangeID{}, fmt.Errorf("invalid change ID %q: revision %q must match D{positive_int} (expected format: %s)", raw, revisionSegment, changeIDFormat) } + revisionID, _ := strconv.Atoi(revisionMatch[1]) - if diffSegment == "" { - return ChangeID{}, fmt.Errorf("invalid change ID %q: empty diff ID (expected format: %s)", raw, changeIDFormat) - } - diffID, err := strconv.Atoi(diffSegment) - if err != nil { - return ChangeID{}, fmt.Errorf("invalid change ID %q: diff ID %q is not a valid integer (expected format: %s)", raw, diffSegment, changeIDFormat) - } - if diffID <= 0 { - return ChangeID{}, fmt.Errorf("invalid change ID %q: diff ID must be positive, got %d (expected format: %s)", raw, diffID, changeIDFormat) + if !diffPattern.MatchString(diffSegment) { + return ChangeID{}, fmt.Errorf("invalid change ID %q: diff %q must be a positive integer (expected format: %s)", raw, diffSegment, changeIDFormat) } + diffID, _ := strconv.Atoi(diffSegment) return ChangeID{ Scheme: gotScheme, diff --git a/entity/phabricator/change_id_test.go b/entity/phabricator/change_id_test.go index ab4db6a7..eb57aa50 100644 --- a/entity/phabricator/change_id_test.go +++ b/entity/phabricator/change_id_test.go @@ -47,6 +47,8 @@ func TestParseChangeID(t *testing.T) { {name: "empty diff", raw: "phab://D123/", wantErr: true}, {name: "non-numeric diff", raw: "phab://D123/abc", wantErr: true}, {name: "non-positive diff", raw: "phab://D123/0", wantErr: true}, + {name: "leading zero revision", raw: "phab://D01/2", wantErr: true}, + {name: "leading zero diff", raw: "phab://D1/02", wantErr: true}, } for _, tt := range tests { From 6b7cd85eece7c4fe43c71352392e6baccef3d5b8 Mon Sep 17 00:00:00 2001 From: Albert Wu Date: Thu, 28 May 2026 07:11:12 -0700 Subject: [PATCH 3/3] fix(phabricator): surface int overflow errors when parsing ChangeID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The regex pre-filter guarantees only [1-9]\d* reaches strconv.Atoi, but unbounded digit runs can still overflow int64 — and silently discarding that error landed RevisionID/DiffID at 0, producing a "valid" ChangeID that the regex was specifically designed to forbid. Check the Atoi error and return it. Adds test cases covering 20-digit overflow inputs for both segments. Co-Authored-By: Claude Opus 4.7 (1M context) --- entity/phabricator/change_id.go | 10 ++++++++-- entity/phabricator/change_id_test.go | 2 ++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/entity/phabricator/change_id.go b/entity/phabricator/change_id.go index 25ce32ed..fa9cee68 100644 --- a/entity/phabricator/change_id.go +++ b/entity/phabricator/change_id.go @@ -94,12 +94,18 @@ func ParseChangeID(raw string) (ChangeID, error) { if revisionMatch == nil { return ChangeID{}, fmt.Errorf("invalid change ID %q: revision %q must match D{positive_int} (expected format: %s)", raw, revisionSegment, changeIDFormat) } - revisionID, _ := strconv.Atoi(revisionMatch[1]) + revisionID, err := strconv.Atoi(revisionMatch[1]) + if err != nil { + return ChangeID{}, fmt.Errorf("invalid change ID %q: revision %q overflows int: %w (expected format: %s)", raw, revisionSegment, err, changeIDFormat) + } if !diffPattern.MatchString(diffSegment) { return ChangeID{}, fmt.Errorf("invalid change ID %q: diff %q must be a positive integer (expected format: %s)", raw, diffSegment, changeIDFormat) } - diffID, _ := strconv.Atoi(diffSegment) + diffID, err := strconv.Atoi(diffSegment) + if err != nil { + return ChangeID{}, fmt.Errorf("invalid change ID %q: diff %q overflows int: %w (expected format: %s)", raw, diffSegment, err, changeIDFormat) + } return ChangeID{ Scheme: gotScheme, diff --git a/entity/phabricator/change_id_test.go b/entity/phabricator/change_id_test.go index eb57aa50..61c53856 100644 --- a/entity/phabricator/change_id_test.go +++ b/entity/phabricator/change_id_test.go @@ -49,6 +49,8 @@ func TestParseChangeID(t *testing.T) { {name: "non-positive diff", raw: "phab://D123/0", wantErr: true}, {name: "leading zero revision", raw: "phab://D01/2", wantErr: true}, {name: "leading zero diff", raw: "phab://D1/02", wantErr: true}, + {name: "overflow revision", raw: "phab://D99999999999999999999/1", wantErr: true}, + {name: "overflow diff", raw: "phab://D1/99999999999999999999", wantErr: true}, } for _, tt := range tests {