From 0a1ce96bee867b3129e336b5b8620a2eaf8ec514 Mon Sep 17 00:00:00 2001
From: "Chrome Release Bot (LUCI)"
 <chrome-official-brancher@chops-service-accounts.iam.gserviceaccount.com>
Date: Thu, 21 Mar 2024 17:19:53 +0000
Subject: [PATCH] Publish DEPS for 114.0.5735.358

git-subtree-dir: url
git-subtree-split: 1759c6ae9316996b9f150c0ce9d0ca78a3d15c02
---
 BUILD.gn                                      |  405 +++
 DEPS                                          |   18 +
 DIR_METADATA                                  |   11 +
 OWNERS                                        |    9 +
 README.md                                     |   75 +
 android/OWNERS                                |    1 +
 android/gurl_android.cc                       |  160 +
 android/gurl_android.h                        |   36 +
 android/gurl_java_test_helper.cc              |   72 +
 android/java/src/org/chromium/url/GURL.java   |  413 +++
 .../src/org/chromium/url/IDNStringUtil.java   |   33 +
 android/java/src/org/chromium/url/Origin.java |  114 +
 android/java/src/org/chromium/url/Parsed.java |  141 +
 android/java/src/org/chromium/url/URI.java    |   61 +
 android/javatests/DEPS                        |    3 +
 .../src/org/chromium/url/GURLJavaTest.java    |  314 ++
 .../org/chromium/url/GURLJavaTestHelper.java  |   34 +
 .../org/chromium/url/JUnitTestGURLsTest.java  |   73 +
 .../src/org/chromium/url/OriginJavaTest.java  |   99 +
 .../chromium/url/OriginJavaTestHelper.java    |   23 +
 .../src/org/chromium/url/ShadowGURLTest.java  |   70 +
 android/origin_android.cc                     |   87 +
 android/origin_java_test_helper.cc            |   37 +
 android/parsed_android.cc                     |   96 +
 android/parsed_android.h                      |   22 +
 android/robolectric_test_main.cc              |   15 +
 .../src/org/chromium/url/JUnitTestGURLs.java  |  174 ++
 .../java/src/org/chromium/url/ShadowGURL.java |   62 +
 features.gni                                  |   16 +
 gurl.cc                                       |  578 ++++
 gurl.h                                        |  534 ++++
 gurl_abstract_tests.h                         |  119 +
 gurl_fuzzer.cc                                |   89 +
 gurl_fuzzer.dict                              |  432 +++
 gurl_unittest.cc                              | 1180 +++++++
 ipc/BUILD.gn                                  |   38 +
 ipc/OWNERS                                    |    2 +
 ipc/url_ipc_export.h                          |   29 +
 ipc/url_param_traits.cc                       |   56 +
 ipc/url_param_traits.h                        |   33 +
 ipc/url_param_traits_unittest.cc              |  159 +
 mojom/BUILD.gn                                |  141 +
 mojom/DEPS                                    |    3 +
 mojom/DIR_METADATA                            |   11 +
 mojom/OWNERS                                  |    4 +
 mojom/origin.mojom                            |   19 +
 mojom/origin_mojom_traits.cc                  |   34 +
 mojom/origin_mojom_traits.h                   |   39 +
 mojom/scheme_host_port.mojom                  |   13 +
 mojom/scheme_host_port_mojom_traits.cc        |   27 +
 mojom/scheme_host_port_mojom_traits.h         |   30 +
 .../scheme_host_port_mojom_traits_unittest.cc |   36 +
 mojom/url.mojom                               |   13 +
 mojom/url_gurl_mojom_traits.cc                |   40 +
 mojom/url_gurl_mojom_traits.h                 |   25 +
 mojom/url_gurl_mojom_traits_unittest.cc       |  209 ++
 mojom/url_test.mojom                          |   16 +
 origin.cc                                     |  482 +++
 origin.h                                      |  496 +++
 origin_abstract_tests.cc                      |  104 +
 origin_abstract_tests.h                       |  527 ++++
 origin_unittest.cc                            |  777 +++++
 run_all_perftests.cc                          |   14 +
 run_all_unittests.cc                          |   27 +
 scheme_host_port.cc                           |  278 ++
 scheme_host_port.h                            |  173 ++
 scheme_host_port_unittest.cc                  |  294 ++
 third_party/mozilla/LICENSE.txt               |   65 +
 third_party/mozilla/README.chromium           |    8 +
 third_party/mozilla/url_parse.cc              |  963 ++++++
 third_party/mozilla/url_parse.h               |  377 +++
 url_canon.cc                                  |   15 +
 url_canon.h                                   | 1037 +++++++
 url_canon_etc.cc                              |  428 +++
 url_canon_filesystemurl.cc                    |  135 +
 url_canon_fileurl.cc                          |  247 ++
 url_canon_host.cc                             |  442 +++
 url_canon_icu.cc                              |  114 +
 url_canon_icu.h                               |   41 +
 url_canon_icu_unittest.cc                     |  168 +
 url_canon_internal.cc                         |  502 +++
 url_canon_internal.h                          |  471 +++
 url_canon_internal_file.h                     |  135 +
 url_canon_ip.cc                               |  690 +++++
 url_canon_ip.h                                |   60 +
 url_canon_mailtourl.cc                        |  127 +
 url_canon_path.cc                             |  474 +++
 url_canon_pathurl.cc                          |  144 +
 url_canon_query.cc                            |  149 +
 url_canon_relative.cc                         |  623 ++++
 url_canon_stdstring.cc                        |   30 +
 url_canon_stdstring.h                         |  132 +
 url_canon_stdurl.cc                           |  209 ++
 url_canon_unittest.cc                         | 2748 +++++++++++++++++
 url_constants.cc                              |   61 +
 url_constants.h                               |   70 +
 url_features.cc                               |   35 +
 url_features.h                                |   33 +
 url_file.h                                    |  101 +
 url_idna_icu.cc                               |  144 +
 url_idna_icu_alternatives_android.cc          |   40 +
 url_idna_icu_alternatives_ios.mm              |   28 +
 url_parse_file.cc                             |  198 ++
 url_parse_internal.h                          |   96 +
 url_parse_perftest.cc                         |  135 +
 url_parse_unittest.cc                         |  687 +++++
 url_test_utils.h                              |   39 +
 url_util.cc                                   |  933 ++++++
 url_util.h                                    |  314 ++
 url_util_internal.h                           |   23 +
 url_util_unittest.cc                          |  631 ++++
 111 files changed, 23857 insertions(+)
 create mode 100644 BUILD.gn
 create mode 100644 DEPS
 create mode 100644 DIR_METADATA
 create mode 100644 OWNERS
 create mode 100644 README.md
 create mode 100644 android/OWNERS
 create mode 100644 android/gurl_android.cc
 create mode 100644 android/gurl_android.h
 create mode 100644 android/gurl_java_test_helper.cc
 create mode 100644 android/java/src/org/chromium/url/GURL.java
 create mode 100644 android/java/src/org/chromium/url/IDNStringUtil.java
 create mode 100644 android/java/src/org/chromium/url/Origin.java
 create mode 100644 android/java/src/org/chromium/url/Parsed.java
 create mode 100644 android/java/src/org/chromium/url/URI.java
 create mode 100644 android/javatests/DEPS
 create mode 100644 android/javatests/src/org/chromium/url/GURLJavaTest.java
 create mode 100644 android/javatests/src/org/chromium/url/GURLJavaTestHelper.java
 create mode 100644 android/javatests/src/org/chromium/url/JUnitTestGURLsTest.java
 create mode 100644 android/javatests/src/org/chromium/url/OriginJavaTest.java
 create mode 100644 android/javatests/src/org/chromium/url/OriginJavaTestHelper.java
 create mode 100644 android/junit/src/org/chromium/url/ShadowGURLTest.java
 create mode 100644 android/origin_android.cc
 create mode 100644 android/origin_java_test_helper.cc
 create mode 100644 android/parsed_android.cc
 create mode 100644 android/parsed_android.h
 create mode 100644 android/robolectric_test_main.cc
 create mode 100644 android/test/java/src/org/chromium/url/JUnitTestGURLs.java
 create mode 100644 android/test/java/src/org/chromium/url/ShadowGURL.java
 create mode 100644 features.gni
 create mode 100644 gurl.cc
 create mode 100644 gurl.h
 create mode 100644 gurl_abstract_tests.h
 create mode 100644 gurl_fuzzer.cc
 create mode 100644 gurl_fuzzer.dict
 create mode 100644 gurl_unittest.cc
 create mode 100644 ipc/BUILD.gn
 create mode 100644 ipc/OWNERS
 create mode 100644 ipc/url_ipc_export.h
 create mode 100644 ipc/url_param_traits.cc
 create mode 100644 ipc/url_param_traits.h
 create mode 100644 ipc/url_param_traits_unittest.cc
 create mode 100644 mojom/BUILD.gn
 create mode 100644 mojom/DEPS
 create mode 100644 mojom/DIR_METADATA
 create mode 100644 mojom/OWNERS
 create mode 100644 mojom/origin.mojom
 create mode 100644 mojom/origin_mojom_traits.cc
 create mode 100644 mojom/origin_mojom_traits.h
 create mode 100644 mojom/scheme_host_port.mojom
 create mode 100644 mojom/scheme_host_port_mojom_traits.cc
 create mode 100644 mojom/scheme_host_port_mojom_traits.h
 create mode 100644 mojom/scheme_host_port_mojom_traits_unittest.cc
 create mode 100644 mojom/url.mojom
 create mode 100644 mojom/url_gurl_mojom_traits.cc
 create mode 100644 mojom/url_gurl_mojom_traits.h
 create mode 100644 mojom/url_gurl_mojom_traits_unittest.cc
 create mode 100644 mojom/url_test.mojom
 create mode 100644 origin.cc
 create mode 100644 origin.h
 create mode 100644 origin_abstract_tests.cc
 create mode 100644 origin_abstract_tests.h
 create mode 100644 origin_unittest.cc
 create mode 100644 run_all_perftests.cc
 create mode 100644 run_all_unittests.cc
 create mode 100644 scheme_host_port.cc
 create mode 100644 scheme_host_port.h
 create mode 100644 scheme_host_port_unittest.cc
 create mode 100644 third_party/mozilla/LICENSE.txt
 create mode 100644 third_party/mozilla/README.chromium
 create mode 100644 third_party/mozilla/url_parse.cc
 create mode 100644 third_party/mozilla/url_parse.h
 create mode 100644 url_canon.cc
 create mode 100644 url_canon.h
 create mode 100644 url_canon_etc.cc
 create mode 100644 url_canon_filesystemurl.cc
 create mode 100644 url_canon_fileurl.cc
 create mode 100644 url_canon_host.cc
 create mode 100644 url_canon_icu.cc
 create mode 100644 url_canon_icu.h
 create mode 100644 url_canon_icu_unittest.cc
 create mode 100644 url_canon_internal.cc
 create mode 100644 url_canon_internal.h
 create mode 100644 url_canon_internal_file.h
 create mode 100644 url_canon_ip.cc
 create mode 100644 url_canon_ip.h
 create mode 100644 url_canon_mailtourl.cc
 create mode 100644 url_canon_path.cc
 create mode 100644 url_canon_pathurl.cc
 create mode 100644 url_canon_query.cc
 create mode 100644 url_canon_relative.cc
 create mode 100644 url_canon_stdstring.cc
 create mode 100644 url_canon_stdstring.h
 create mode 100644 url_canon_stdurl.cc
 create mode 100644 url_canon_unittest.cc
 create mode 100644 url_constants.cc
 create mode 100644 url_constants.h
 create mode 100644 url_features.cc
 create mode 100644 url_features.h
 create mode 100644 url_file.h
 create mode 100644 url_idna_icu.cc
 create mode 100644 url_idna_icu_alternatives_android.cc
 create mode 100644 url_idna_icu_alternatives_ios.mm
 create mode 100644 url_parse_file.cc
 create mode 100644 url_parse_internal.h
 create mode 100644 url_parse_perftest.cc
 create mode 100644 url_parse_unittest.cc
 create mode 100644 url_test_utils.h
 create mode 100644 url_util.cc
 create mode 100644 url_util.h
 create mode 100644 url_util_internal.h
 create mode 100644 url_util_unittest.cc

diff --git a/BUILD.gn b/BUILD.gn
new file mode 100644
index 00000000000..b5d6f606382
--- /dev/null
+++ b/BUILD.gn
@@ -0,0 +1,405 @@
+# Copyright 2013 The Chromium Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//build/buildflag_header.gni")
+import("//testing/libfuzzer/fuzzer_test.gni")
+import("//testing/test.gni")
+import("features.gni")
+
+import("//build/config/cronet/config.gni")
+
+if (is_android || is_robolectric) {
+  import("//build/config/android/rules.gni")
+}
+
+buildflag_header("buildflags") {
+  header = "buildflags.h"
+  flags = [ "USE_PLATFORM_ICU_ALTERNATIVES=$use_platform_icu_alternatives" ]
+}
+
+component("url") {
+  sources = [
+    "gurl.cc",
+    "gurl.h",
+    "origin.cc",
+    "origin.h",
+    "scheme_host_port.cc",
+    "scheme_host_port.h",
+    "third_party/mozilla/url_parse.cc",
+    "third_party/mozilla/url_parse.h",
+    "url_canon.cc",
+    "url_canon.h",
+    "url_canon_etc.cc",
+    "url_canon_filesystemurl.cc",
+    "url_canon_fileurl.cc",
+    "url_canon_host.cc",
+    "url_canon_internal.cc",
+    "url_canon_internal.h",
+    "url_canon_internal_file.h",
+    "url_canon_ip.cc",
+    "url_canon_ip.h",
+    "url_canon_mailtourl.cc",
+    "url_canon_path.cc",
+    "url_canon_pathurl.cc",
+    "url_canon_query.cc",
+    "url_canon_relative.cc",
+    "url_canon_stdstring.cc",
+    "url_canon_stdstring.h",
+    "url_canon_stdurl.cc",
+    "url_constants.cc",
+    "url_constants.h",
+    "url_features.cc",
+    "url_features.h",
+    "url_file.h",
+    "url_parse_file.cc",
+    "url_parse_internal.h",
+    "url_util.cc",
+    "url_util.h",
+    "url_util_internal.h",
+  ]
+
+  defines = [ "IS_URL_IMPL" ]
+
+  public_deps = [ "//base" ]
+
+  deps = [ "//base/third_party/dynamic_annotations" ]
+
+  if (is_win) {
+    # Don't conflict with Windows' "url.dll".
+    output_name = "url_lib"
+  }
+
+  # ICU support.
+  if (use_platform_icu_alternatives) {
+    if (is_android) {
+      sources += [ "url_idna_icu_alternatives_android.cc" ]
+      deps += [
+        ":buildflags",
+        ":url_java",
+        ":url_jni_headers",
+        "//base",
+        "//base/third_party/dynamic_annotations",
+      ]
+    } else if (is_ios) {
+      sources += [ "url_idna_icu_alternatives_ios.mm" ]
+    } else {
+      assert(false,
+             "ICU alternative is not implemented for platform: " + target_os)
+    }
+  } else {
+    # Use ICU.
+    sources += [
+      "url_canon_icu.cc",
+      "url_canon_icu.h",
+      "url_idna_icu.cc",
+    ]
+    deps += [
+      "//base:i18n",
+      "//third_party/icu",
+    ]
+  }
+}
+
+if (is_android || is_robolectric) {
+  generate_jni("url_jni_headers") {
+    sources = [
+      "android/java/src/org/chromium/url/IDNStringUtil.java",
+      "android/java/src/org/chromium/url/Origin.java",
+    ]
+  }
+
+  generate_jni("gurl_jni_headers") {
+    sources = [
+      "android/java/src/org/chromium/url/GURL.java",
+      "android/java/src/org/chromium/url/Parsed.java",
+    ]
+  }
+
+  source_set("gurl_android") {
+    sources = [
+      "android/gurl_android.cc",
+      "android/gurl_android.h",
+      "android/parsed_android.cc",
+      "android/parsed_android.h",
+    ]
+
+    deps = [
+      ":gurl_jni_headers",
+      ":url",
+      "//base:base",
+    ]
+
+    if (is_robolectric) {
+      # Make jni.h available.
+      configs += [ "//third_party/jdk" ]
+    }
+  }
+
+  static_library("origin_android") {
+    sources = [ "android/origin_android.cc" ]
+
+    deps = [
+      ":gurl_android",
+      ":url",
+      ":url_jni_headers",
+      "//base",
+    ]
+  }
+}
+
+if (is_android) {
+  android_library("url_java") {
+    sources = [ "android/java/src/org/chromium/url/IDNStringUtil.java" ]
+    deps = [ "//base:jni_java" ]
+  }
+}
+
+if (is_android && !is_cronet_build) {
+  android_library("gurl_java") {
+    sources = [
+      "android/java/src/org/chromium/url/GURL.java",
+      "android/java/src/org/chromium/url/Parsed.java",
+      "android/java/src/org/chromium/url/URI.java",
+    ]
+    deps = [
+      "//base:base_java",
+      "//base:jni_java",
+      "//build/android:build_java",
+      "//third_party/android_deps:com_google_errorprone_error_prone_annotations_java",
+      "//third_party/androidx:androidx_annotation_annotation_java",
+      "//url/mojom:url_mojom_gurl_java",
+    ]
+    annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ]
+  }
+
+  android_library("origin_java") {
+    sources = [ "android/java/src/org/chromium/url/Origin.java" ]
+    deps = [
+      ":gurl_java",
+      "//base:jni_java",
+      "//build/android:build_java",
+      "//mojo/public/java:bindings_java",
+      "//mojo/public/mojom/base:base_java",
+      "//url/mojom:url_mojom_origin_java",
+    ]
+    annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ]
+  }
+}
+
+source_set("url_test_support") {
+  testonly = true
+
+  sources = [
+    "gurl_abstract_tests.h",
+    "origin_abstract_tests.cc",
+    "origin_abstract_tests.h",
+  ]
+
+  public_deps = [
+    ":url",
+    "//base",
+    "//base/test:test_support",
+    "//testing/gmock",
+    "//testing/gtest",
+  ]
+}
+
+test("url_unittests") {
+  sources = [
+    "gurl_unittest.cc",
+    "origin_unittest.cc",
+    "run_all_unittests.cc",
+    "scheme_host_port_unittest.cc",
+    "url_canon_icu_unittest.cc",
+    "url_canon_unittest.cc",
+    "url_parse_unittest.cc",
+    "url_test_utils.h",
+    "url_util_unittest.cc",
+  ]
+
+  deps = [
+    ":url",
+    ":url_test_support",
+    "//base",
+    "//base/test:test_support",
+    "//testing/gmock",
+    "//testing/gtest",
+  ]
+
+  if (use_platform_icu_alternatives) {
+    # Unit tests that are not supported by the current ICU alternatives on Android.
+    if (is_android) {
+      sources -= [
+        "url_canon_icu_unittest.cc",
+        "url_canon_unittest.cc",
+      ]
+      deps += [ ":url_java" ]
+    }
+
+    # Unit tests that are not supported by the current ICU alternatives on iOS.
+    if (is_ios) {
+      sources -= [
+        "origin_unittest.cc",
+        "scheme_host_port_unittest.cc",
+        "url_canon_icu_unittest.cc",
+        "url_canon_unittest.cc",
+      ]
+    }
+  } else {  # !use_platform_icu_alternatives
+    deps += [ "//third_party/icu:icuuc" ]
+  }
+
+  if (!is_ios && !is_cronet_build) {
+    sources += [
+      "mojom/scheme_host_port_mojom_traits_unittest.cc",
+      "mojom/url_gurl_mojom_traits_unittest.cc",
+    ]
+    deps += [
+      "//mojo/core/embedder",
+      "//mojo/public/cpp/test_support:test_utils",
+      "//url/ipc:url_ipc_unittests",
+      "//url/mojom:mojom_traits",
+      "//url/mojom:test_url_mojom_gurl",
+    ]
+  }
+}
+
+test("url_perftests") {
+  sources = [
+    "run_all_perftests.cc",
+    "url_parse_perftest.cc",
+  ]
+
+  deps = [
+    ":url",
+    "//base",
+    "//base/test:test_support",
+    "//testing/gtest",
+  ]
+}
+
+fuzzer_test("gurl_fuzzer") {
+  sources = [ "gurl_fuzzer.cc" ]
+  deps = [
+    ":url",
+    "//base",
+    "//base:i18n",
+  ]
+  dict = "gurl_fuzzer.dict"
+}
+
+if (is_android && !is_cronet_build) {
+  source_set("android_test_helper") {
+    testonly = true
+    sources = [
+      "android/gurl_java_test_helper.cc",
+      "android/origin_java_test_helper.cc",
+    ]
+    deps = [
+      ":gurl_android",
+      ":j_test_jni_headers",
+      ":origin_android",
+      ":url",
+      "//base/test:test_support",
+    ]
+  }
+
+  android_library("android_test_helper_java") {
+    testonly = true
+    annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ]
+    sources = [
+      "android/javatests/src/org/chromium/url/GURLJavaTestHelper.java",
+      "android/javatests/src/org/chromium/url/OriginJavaTestHelper.java",
+    ]
+    deps = [
+      ":gurl_java",
+      ":origin_java",
+      "//base:base_java_test_support",
+      "//base:jni_java",
+    ]
+  }
+
+  # Targets depending on gurl_junit_test_support do not need to bypass platform
+  # checks.
+  android_library("gurl_junit_test_support") {
+    testonly = true
+    sources = [ "android/test/java/src/org/chromium/url/JUnitTestGURLs.java" ]
+    deps = [ ":gurl_java" ]
+  }
+
+  # Unlike gurl_junit_test_support targets depending on gurl_junit_shadows must
+  # bypass platform checks.
+  robolectric_library("gurl_junit_shadows") {
+    sources = [ "android/test/java/src/org/chromium/url/ShadowGURL.java" ]
+    deps = [
+      ":gurl_java",
+      ":gurl_junit_test_support",
+    ]
+  }
+
+  android_library("url_java_unit_tests") {
+    testonly = true
+    sources = [
+      "android/javatests/src/org/chromium/url/GURLJavaTest.java",
+      "android/javatests/src/org/chromium/url/JUnitTestGURLsTest.java",
+      "android/javatests/src/org/chromium/url/OriginJavaTest.java",
+    ]
+    deps = [
+      ":android_test_helper_java",
+      ":gurl_java",
+      ":gurl_junit_test_support",
+      ":origin_java",
+      "//base:base_java",
+      "//base:base_java_test_support",
+      "//base:jni_java",
+      "//content/public/test/android:content_java_test_support",
+      "//mojo/public/mojom/base:base_java",
+      "//third_party/androidx:androidx_core_core_java",
+      "//third_party/androidx:androidx_test_runner_java",
+      "//third_party/junit",
+      "//third_party/mockito:mockito_java",
+      "//url/mojom:url_mojom_gurl_java",
+      "//url/mojom:url_mojom_origin_java",
+    ]
+    annotation_processor_deps = [ "//base/android/jni_generator:jni_processor" ]
+  }
+
+  # See https://bugs.chromium.org/p/chromium/issues/detail?id=908819 for why we
+  # can't put 'java' in the name here.
+  generate_jni("j_test_jni_headers") {
+    testonly = true
+    sources = [
+      "android/javatests/src/org/chromium/url/GURLJavaTestHelper.java",
+      "android/javatests/src/org/chromium/url/OriginJavaTestHelper.java",
+    ]
+  }
+
+  robolectric_library("gurl_junit_tests") {
+    sources = [ "android/junit/src/org/chromium/url/ShadowGURLTest.java" ]
+    deps = [
+      ":gurl_java",
+      ":gurl_junit_shadows",
+      ":gurl_junit_test_support",
+      "//base:base_java_test_support",
+      "//base:base_junit_test_support",
+      "//base/test:test_support_java",
+      "//third_party/junit",
+    ]
+  }
+}
+
+if (is_robolectric) {
+  # Use this in robolectric_binary() targets if you just need GURL and //base
+  # functionality. Otherwise, define a custom shared_library().
+  shared_library("libgurl_robolectric") {
+    sources = [ "android/robolectric_test_main.cc" ]
+    deps = [
+      "//base",
+      "//url:gurl_android",
+    ]
+
+    # Make jni.h available.
+    configs += [ "//third_party/jdk" ]
+  }
+}
diff --git a/DEPS b/DEPS
new file mode 100644
index 00000000000..166f6a26c1e
--- /dev/null
+++ b/DEPS
@@ -0,0 +1,18 @@
+include_rules = [
+  # Limit files that can depend on icu.
+  "-base/i18n",
+  "-third_party/icu",
+]
+
+specific_include_rules = {
+  "gurl_fuzzer.cc": [
+    "+base/i18n",
+  ],
+  "url_(canon|idna)_icu(\.cc|_unittest\.cc)": [
+    "+base/i18n",
+    "+third_party/icu",
+  ],
+  "run_all_unittests\.cc": [
+    "+mojo/core/embedder",
+  ],
+}
diff --git a/DIR_METADATA b/DIR_METADATA
new file mode 100644
index 00000000000..16c80be74ea
--- /dev/null
+++ b/DIR_METADATA
@@ -0,0 +1,11 @@
+# Metadata information for this directory.
+#
+# For more information on DIR_METADATA files, see:
+#   https://source.chromium.org/chromium/infra/infra/+/main:go/src/infra/tools/dirmd/README.md
+#
+# For the schema of this file, see Metadata message:
+#   https://source.chromium.org/chromium/infra/infra/+/main:go/src/infra/tools/dirmd/proto/dir_metadata.proto
+
+monorail {
+  component: "Blink>Network"
+}
\ No newline at end of file
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 00000000000..58f2e5fcef3
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1,9 @@
+set noparent
+# NOTE: keep this in sync with lsc-owners-override@chromium.org owners
+# by emailing lsc-policy@chromium.org when this list changes.
+csharrison@chromium.org
+dcheng@chromium.org
+mkwst@chromium.org
+timothygu@chromium.org
+# NOTE: keep this in sync with lsc-owners-override@chromium.org owners
+# by emailing lsc-policy@chromium.org when this list changes.
diff --git a/README.md b/README.md
new file mode 100644
index 00000000000..8d63fda613d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,75 @@
+# Chrome's URL library
+
+## Layers
+
+There are several conceptual layers in this directory. Going from the lowest
+level up, they are:
+
+### Parsing
+
+The `url_parse.*` files are the parser. This code does no string
+transformations. Its only job is to take an input string and split out the
+components of the URL as best as it can deduce them, for a given type of URL.
+Parsing can never fail, it will take its best guess. This layer does not
+have logic for determining the type of URL parsing to apply, that needs to
+be applied at a higher layer (the "util" layer below).
+
+Because the parser code is derived (_very_ distantly) from some code in
+Mozilla, some of the parser files are in `url/third_party/mozilla/`.
+
+The main header to include for calling the parser is
+`url/third_party/mozilla/url_parse.h`.
+
+### Canonicalization
+
+The `url_canon*` files are the canonicalizer. This code will transform specific
+URL components or specific types of URLs into a standard form. For some
+dangerous or invalid data, the canonicalizer will report that a URL is invalid,
+although it will always try its best to produce output (so the calling code
+can, for example, show the user an error that the URL is invalid). The
+canonicalizer attempts to provide as consistent a representation as possible
+without changing the meaning of a URL.
+
+The canonicalizer layer is designed to be independent of the string type of
+the embedder, so all string output is done through a `CanonOutput` wrapper
+object. An implementation for `std::string` output is provided in
+`url_canon_stdstring.h`.
+
+The main header to include for calling the canonicalizer is
+`url/url_canon.h`.
+
+### Utility
+
+The `url_util*` files provide a higher-level wrapper around the parser and
+canonicalizer. While it can be called directly, it is designed to be the
+foundation for writing URL wrapper objects (The GURL later and Blink's KURL
+object use the Utility layer to implement the low-level logic).
+
+The Utility code makes decisions about URL types and calls the correct parsing
+and canonicalzation functions for those types. It provides an interface to
+register application-specific schemes that have specific requirements.
+Sharing this loigic between KURL and GURL is important so that URLs are
+handled consistently across the application.
+
+The main header to include is `url/url_util.h`.
+
+### Google URL (GURL) and Origin
+
+At the highest layer, a C++ object for representing URLs is provided. This
+object uses STL. Most uses need only this layer. Include `url/gurl.h`.
+
+Also at this layer is also the Origin object which exists to make security
+decisions on the web. Include `url/origin.h`.
+
+## Historical background
+
+This code was originally a separate library that was designed to be embedded
+into both Chrome (which uses STL) and WebKit (which didn't use any STL at the
+time). As a result, the parsing, canonicalization, and utility code could
+not use STL, or any other common code in Chromium like base.
+
+When WebKit was forked into the Chromium repo and renamed Blink, this
+restriction has been relaxed somewhat. Blink still provides its own URL object
+using its own string type, so the insulation that the Utility layer provides is
+still useful. But some STL strings and calls to base functions have gradually
+been added in places where doing so is possible.
diff --git a/android/OWNERS b/android/OWNERS
new file mode 100644
index 00000000000..c19374d6fb8
--- /dev/null
+++ b/android/OWNERS
@@ -0,0 +1 @@
+mthiesse@chromium.org
diff --git a/android/gurl_android.cc b/android/gurl_android.cc
new file mode 100644
index 00000000000..bf398a13218
--- /dev/null
+++ b/android/gurl_android.cc
@@ -0,0 +1,160 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/android/gurl_android.h"
+
+#include <jni.h>
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "base/android/jni_android.h"
+#include "base/android/jni_string.h"
+#include "base/functional/bind.h"
+#include "base/functional/callback.h"
+#include "base/memory/ptr_util.h"
+#include "url/android/parsed_android.h"
+#include "url/gurl_jni_headers/GURL_jni.h"
+#include "url/third_party/mozilla/url_parse.h"
+
+using base::android::AttachCurrentThread;
+using base::android::JavaParamRef;
+using base::android::JavaRef;
+using base::android::ScopedJavaLocalRef;
+
+namespace url {
+
+namespace {
+
+static GURL FromJString(JNIEnv* env, const JavaRef<jstring>& uri) {
+  if (!uri)
+    return GURL();
+  return GURL(base::android::ConvertJavaStringToUTF16(env, uri));
+}
+
+static std::unique_ptr<GURL> FromJavaGURL(JNIEnv* env,
+                                          const JavaRef<jstring>& j_spec,
+                                          bool is_valid,
+                                          jlong parsed_ptr) {
+  Parsed* parsed = reinterpret_cast<Parsed*>(parsed_ptr);
+  const std::string& spec = ConvertJavaStringToUTF8(env, j_spec);
+  std::unique_ptr<GURL> gurl =
+      std::make_unique<GURL>(spec.data(), parsed->Length(), *parsed, is_valid);
+  delete parsed;
+  return gurl;
+}
+
+static void InitFromGURL(JNIEnv* env,
+                         const GURL& gurl,
+                         const JavaRef<jobject>& target) {
+  Java_GURL_init(
+      env, target,
+      base::android::ConvertUTF8ToJavaString(env, gurl.possibly_invalid_spec()),
+      gurl.is_valid(),
+      ParsedAndroid::InitFromParsed(env,
+                                    gurl.parsed_for_possibly_invalid_spec()));
+}
+
+// As |GetArrayLength| makes no guarantees about the returned value (e.g., it
+// may be -1 if |array| is not a valid Java array), provide a safe wrapper
+// that always returns a valid, non-negative size.
+template <typename JavaArrayType>
+size_t SafeGetArrayLength(JNIEnv* env, const JavaRef<JavaArrayType>& jarray) {
+  DCHECK(jarray);
+  jsize length = env->GetArrayLength(jarray.obj());
+  DCHECK_GE(length, 0) << "Invalid array length: " << length;
+  return static_cast<size_t>(std::max(0, length));
+}
+
+}  // namespace
+
+// static
+std::unique_ptr<GURL> GURLAndroid::ToNativeGURL(
+    JNIEnv* env,
+    const base::android::JavaRef<jobject>& j_gurl) {
+  return base::WrapUnique<GURL>(
+      reinterpret_cast<GURL*>(Java_GURL_toNativeGURL(env, j_gurl)));
+}
+
+void GURLAndroid::JavaGURLArrayToGURLVector(
+    JNIEnv* env,
+    const base::android::JavaRef<jobjectArray>& array,
+    std::vector<GURL>* out) {
+  DCHECK(out);
+  DCHECK(out->empty());
+  if (!array)
+    return;
+  size_t len = SafeGetArrayLength(env, array);
+  for (size_t i = 0; i < len; ++i) {
+    ScopedJavaLocalRef<jobject> j_gurl(
+        env, static_cast<jobject>(env->GetObjectArrayElement(array.obj(), i)));
+    out->emplace_back(
+        *reinterpret_cast<GURL*>(Java_GURL_toNativeGURL(env, j_gurl)));
+  }
+}
+
+// static
+ScopedJavaLocalRef<jobject> GURLAndroid::FromNativeGURL(JNIEnv* env,
+                                                        const GURL& gurl) {
+  ScopedJavaLocalRef<jobject> j_gurl = Java_GURL_Constructor(env);
+  InitFromGURL(env, gurl, j_gurl);
+  return j_gurl;
+}
+
+// static
+ScopedJavaLocalRef<jobject> GURLAndroid::EmptyGURL(JNIEnv* env) {
+  return Java_GURL_emptyGURL(env);
+}
+
+// static
+ScopedJavaLocalRef<jobjectArray> GURLAndroid::ToJavaArrayOfGURLs(
+    JNIEnv* env,
+    base::span<ScopedJavaLocalRef<jobject>> v) {
+  jclass clazz = org_chromium_url_GURL_clazz(env);
+  DCHECK(clazz);
+  jobjectArray joa = env->NewObjectArray(v.size(), clazz, nullptr);
+  base::android::CheckException(env);
+
+  for (size_t i = 0; i < v.size(); ++i) {
+    env->SetObjectArrayElement(joa, i, v[i].obj());
+  }
+  return ScopedJavaLocalRef<jobjectArray>(env, joa);
+}
+
+static void JNI_GURL_GetOrigin(JNIEnv* env,
+                               const JavaParamRef<jstring>& j_spec,
+                               jboolean is_valid,
+                               jlong parsed_ptr,
+                               const JavaParamRef<jobject>& target) {
+  std::unique_ptr<GURL> gurl = FromJavaGURL(env, j_spec, is_valid, parsed_ptr);
+  InitFromGURL(env, gurl->DeprecatedGetOriginAsURL(), target);
+}
+
+static jboolean JNI_GURL_DomainIs(JNIEnv* env,
+                                  const JavaParamRef<jstring>& j_spec,
+                                  jboolean is_valid,
+                                  jlong parsed_ptr,
+                                  const JavaParamRef<jstring>& j_domain) {
+  std::unique_ptr<GURL> gurl = FromJavaGURL(env, j_spec, is_valid, parsed_ptr);
+  const std::string& domain = ConvertJavaStringToUTF8(env, j_domain);
+  return gurl->DomainIs(domain);
+}
+
+static void JNI_GURL_Init(JNIEnv* env,
+                          const base::android::JavaParamRef<jstring>& uri,
+                          const base::android::JavaParamRef<jobject>& target) {
+  const GURL& gurl = FromJString(env, uri);
+  InitFromGURL(env, gurl, target);
+}
+
+static jlong JNI_GURL_CreateNative(JNIEnv* env,
+                                   const JavaParamRef<jstring>& j_spec,
+                                   jboolean is_valid,
+                                   jlong parsed_ptr) {
+  return reinterpret_cast<intptr_t>(
+      FromJavaGURL(env, j_spec, is_valid, parsed_ptr).release());
+}
+
+}  // namespace url
diff --git a/android/gurl_android.h b/android/gurl_android.h
new file mode 100644
index 00000000000..8b356070da6
--- /dev/null
+++ b/android/gurl_android.h
@@ -0,0 +1,36 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_ANDROID_GURL_ANDROID_H_
+#define URL_ANDROID_GURL_ANDROID_H_
+
+#include <memory>
+
+#include "base/android/scoped_java_ref.h"
+#include "base/containers/span.h"
+#include "url/gurl.h"
+
+namespace url {
+
+class GURLAndroid {
+ public:
+  static std::unique_ptr<GURL> ToNativeGURL(
+      JNIEnv* env,
+      const base::android::JavaRef<jobject>& j_gurl);
+  static base::android::ScopedJavaLocalRef<jobject> FromNativeGURL(
+      JNIEnv* env,
+      const GURL& gurl);
+  static base::android::ScopedJavaLocalRef<jobject> EmptyGURL(JNIEnv* env);
+  static base::android::ScopedJavaLocalRef<jobjectArray> ToJavaArrayOfGURLs(
+      JNIEnv* env,
+      base::span<base::android::ScopedJavaLocalRef<jobject>> v);
+  static void JavaGURLArrayToGURLVector(
+      JNIEnv* env,
+      const base::android::JavaRef<jobjectArray>& gurl_array,
+      std::vector<GURL>* out);
+};
+
+}  // namespace url
+
+#endif  // URL_ANDROID_GURL_ANDROID_H_
diff --git a/android/gurl_java_test_helper.cc b/android/gurl_java_test_helper.cc
new file mode 100644
index 00000000000..5ad1b44a6ac
--- /dev/null
+++ b/android/gurl_java_test_helper.cc
@@ -0,0 +1,72 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+
+#include "base/android/jni_android.h"
+#include "base/android/jni_string.h"
+#include "base/test/icu_test_util.h"
+#include "url/android/gurl_android.h"
+#include "url/gurl.h"
+#include "url/j_test_jni_headers/GURLJavaTestHelper_jni.h"
+
+using base::android::AttachCurrentThread;
+
+namespace url {
+
+static void JNI_GURLJavaTestHelper_InitializeICU(JNIEnv* env) {
+  base::test::InitializeICUForTesting();
+}
+
+static void JNI_GURLJavaTestHelper_TestGURLEquivalence(JNIEnv* env) {
+  const char* cases[] = {
+      // Common Standard URLs.
+      "https://www.google.com",
+      "https://www.google.com/",
+      "https://www.google.com/maps.htm",
+      "https://www.google.com/maps/",
+      "https://www.google.com/index.html",
+      "https://www.google.com/index.html?q=maps",
+      "https://www.google.com/index.html#maps/",
+      "https://foo:bar@www.google.com/maps.htm",
+      "https://www.google.com/maps/au/index.html",
+      "https://www.google.com/maps/au/north",
+      "https://www.google.com/maps/au/north/",
+      "https://www.google.com/maps/au/index.html?q=maps#fragment/",
+      "http://www.google.com:8000/maps/au/index.html?q=maps#fragment/",
+      "https://www.google.com/maps/au/north/?q=maps#fragment",
+      "https://www.google.com/maps/au/north?q=maps#fragment",
+      // Less common standard URLs.
+      "filesystem:http://www.google.com/temporary/bar.html?baz=22",
+      "file:///temporary/bar.html?baz=22",
+      "ftp://foo/test/index.html",
+      "gopher://foo/test/index.html",
+      "ws://foo/test/index.html",
+      // Non-standard,
+      "chrome://foo/bar.html",
+      "httpa://foo/test/index.html",
+      "blob:https://foo.bar/test/index.html",
+      "about:blank",
+      "data:foobar",
+      "scheme:opaque_data",
+      // Invalid URLs.
+      "foobar",
+  };
+  for (const char* uri : cases) {
+    GURL gurl(uri);
+    base::android::ScopedJavaLocalRef<jobject> j_gurl =
+        Java_GURLJavaTestHelper_createGURL(
+            env, base::android::ConvertUTF8ToJavaString(env, uri));
+    std::unique_ptr<GURL> gurl2 = GURLAndroid::ToNativeGURL(env, j_gurl);
+    if (gurl != *gurl2) {
+      std::stringstream ss;
+      ss << "GURL not equivalent: " << gurl << ", " << *gurl2;
+      env->ThrowNew(env->FindClass("java/lang/AssertionError"),
+                    ss.str().data());
+      return;
+    }
+  }
+}
+
+}  // namespace url
diff --git a/android/java/src/org/chromium/url/GURL.java b/android/java/src/org/chromium/url/GURL.java
new file mode 100644
index 00000000000..34bd924951a
--- /dev/null
+++ b/android/java/src/org/chromium/url/GURL.java
@@ -0,0 +1,413 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import android.os.SystemClock;
+import android.text.TextUtils;
+
+import androidx.annotation.Nullable;
+import androidx.annotation.VisibleForTesting;
+
+import com.google.errorprone.annotations.DoNotMock;
+
+import org.chromium.base.Log;
+import org.chromium.base.ThreadUtils;
+import org.chromium.base.annotations.CalledByNative;
+import org.chromium.base.annotations.JNINamespace;
+import org.chromium.base.annotations.NativeMethods;
+import org.chromium.base.library_loader.LibraryLoader;
+import org.chromium.base.metrics.RecordHistogram;
+import org.chromium.base.task.PostTask;
+import org.chromium.base.task.TaskTraits;
+import org.chromium.build.annotations.MainDex;
+import org.chromium.url.mojom.Url;
+import org.chromium.url.mojom.UrlConstants;
+
+import java.util.Random;
+
+/**
+ * An immutable Java wrapper for GURL, Chromium's URL parsing library.
+ *
+ * This class is safe to use during startup, but will block on the native library being sufficiently
+ * loaded to use native GURL (and will not wait for content initialization). In practice it's very
+ * unlikely that this will actually block startup unless used extremely early, in which case you
+ * should probably seek an alternative solution to using GURL.
+ *
+ * The design of this class avoids destruction/finalization by caching all values necessary to
+ * reconstruct a GURL in Java, allowing it to be much faster in the common case and easier to use.
+ */
+@JNINamespace("url")
+@MainDex
+@DoNotMock("Create a real instance instead. For Robolectric, see JUnitTestGURLs.java")
+public class GURL {
+    private static final String TAG = "GURL";
+    /* package */ static final int SERIALIZER_VERSION = 1;
+    /* package */ static final char SERIALIZER_DELIMITER = '\0';
+
+    @FunctionalInterface
+    public interface ReportDebugThrowableCallback {
+        void run(Throwable throwable);
+    }
+
+    /**
+     * Exception signalling that a GURL failed to parse due to an unexpected version marker in the
+     * serialized input.
+     */
+    public static class BadSerializerVersionException extends RuntimeException {}
+
+    // Right now this is only collecting reports on Canary which has a relatively small population.
+    private static final int DEBUG_REPORT_PERCENTAGE = 10;
+    private static ReportDebugThrowableCallback sReportCallback;
+
+    // TODO(https://crbug.com/1039841): Right now we return a new String with each request for a
+    //      GURL component other than the spec itself. Should we cache return Strings (as
+    //      WeakReference?) so that callers can share String memory?
+    private String mSpec;
+    private boolean mIsValid;
+    private Parsed mParsed;
+
+    private static class Holder { private static GURL sEmptyGURL = new GURL(""); }
+
+    @CalledByNative
+    public static GURL emptyGURL() {
+        return Holder.sEmptyGURL;
+    }
+
+    /**
+     * Create a new GURL.
+     *
+     * @param uri The string URI representation to parse into a GURL.
+     */
+    public GURL(String uri) {
+        // Avoid a jni hop (and initializing the native library) for empty GURLs.
+        if (TextUtils.isEmpty(uri)) {
+            mSpec = "";
+            mParsed = Parsed.createEmpty();
+            return;
+        }
+        ensureNativeInitializedForGURL();
+        getNatives().init(uri, this);
+    }
+
+    @CalledByNative
+    protected GURL() {}
+
+    /**
+     * Enables debug stack trace gathering for GURL.
+     */
+    public static void setReportDebugThrowableCallback(ReportDebugThrowableCallback callback) {
+        sReportCallback = callback;
+    }
+
+    /**
+     * Ensures that the native library is sufficiently loaded for GURL usage.
+     *
+     * This function is public so that GURL-related usage like the UrlFormatter also counts towards
+     * the "Startup.Android.GURLEnsureMainDexInitialized" histogram.
+     */
+    public static void ensureNativeInitializedForGURL() {
+        if (LibraryLoader.getInstance().isInitialized()) return;
+        long time = SystemClock.elapsedRealtime();
+        LibraryLoader.getInstance().ensureMainDexInitialized();
+        // Record metrics only for the UI thread where the delay in loading the library is relevant.
+        if (ThreadUtils.runningOnUiThread()) {
+            // "MainDex" in name of histogram is a dated reference to when we used to have 2
+            // sections of the native library, main dex and non-main dex. Maintaining name for
+            // consistency in metrics.
+            RecordHistogram.recordTimesHistogram("Startup.Android.GURLEnsureMainDexInitialized",
+                    SystemClock.elapsedRealtime() - time);
+            if (sReportCallback != null && new Random().nextInt(100) < DEBUG_REPORT_PERCENTAGE) {
+                final Throwable throwable =
+                        new Throwable("This is not a crash, please ignore. See crbug.com/1065377.");
+                // This isn't an assert, because by design this is possible, but we would prefer
+                // this path does not get hit more than necessary and getting stack traces from the
+                // wild will help find issues.
+                PostTask.postTask(TaskTraits.BEST_EFFORT_MAY_BLOCK,
+                        () -> { sReportCallback.run(throwable); });
+            }
+        }
+    }
+
+    /** @return true if the GURL is null, empty, or invalid. */
+    public static boolean isEmptyOrInvalid(@Nullable GURL gurl) {
+        return gurl == null || gurl.isEmpty() || !gurl.isValid();
+    }
+
+    @CalledByNative
+    private void init(String spec, boolean isValid, Parsed parsed) {
+        mSpec = spec;
+        // Ensure that the spec only contains US-ASCII or the parsed indices will be wrong.
+        assert mSpec.matches("\\A\\p{ASCII}*\\z");
+        mIsValid = isValid;
+        mParsed = parsed;
+    }
+
+    @CalledByNative
+    private long toNativeGURL() {
+        return getNatives().createNative(mSpec, mIsValid, mParsed.toNativeParsed());
+    }
+
+    /**
+     * See native GURL::is_valid().
+     */
+    public boolean isValid() {
+        return mIsValid;
+    }
+
+    /**
+     * See native GURL::spec().
+     */
+    public String getSpec() {
+        if (isValid() || mSpec.isEmpty()) return mSpec;
+        assert false : "Trying to get the spec of an invalid URL!";
+        return "";
+    }
+
+    /**
+     * @return Either a valid Spec (see {@link #getSpec}), or an empty string.
+     */
+    public String getValidSpecOrEmpty() {
+        if (isValid()) return mSpec;
+        return "";
+    }
+
+    /**
+     * See native GURL::possibly_invalid_spec().
+     */
+    public String getPossiblyInvalidSpec() {
+        return mSpec;
+    }
+
+    private String getComponent(int begin, int length) {
+        if (length <= 0) return "";
+        return mSpec.substring(begin, begin + length);
+    }
+
+    /**
+     * See native GURL::scheme().
+     */
+    public String getScheme() {
+        return getComponent(mParsed.mSchemeBegin, mParsed.mSchemeLength);
+    }
+
+    /**
+     * See native GURL::username().
+     */
+    public String getUsername() {
+        return getComponent(mParsed.mUsernameBegin, mParsed.mUsernameLength);
+    }
+
+    /**
+     * See native GURL::password().
+     */
+    public String getPassword() {
+        return getComponent(mParsed.mPasswordBegin, mParsed.mPasswordLength);
+    }
+
+    /**
+     * See native GURL::host().
+     */
+    public String getHost() {
+        return getComponent(mParsed.mHostBegin, mParsed.mHostLength);
+    }
+
+    /**
+     * See native GURL::port().
+     *
+     * Note: Do not convert this to an integer yourself. See native GURL::IntPort().
+     */
+    public String getPort() {
+        return getComponent(mParsed.mPortBegin, mParsed.mPortLength);
+    }
+
+    /**
+     * See native GURL::path().
+     */
+    public String getPath() {
+        return getComponent(mParsed.mPathBegin, mParsed.mPathLength);
+    }
+
+    /**
+     * See native GURL::query().
+     */
+    public String getQuery() {
+        return getComponent(mParsed.mQueryBegin, mParsed.mQueryLength);
+    }
+
+    /**
+     * See native GURL::ref().
+     */
+    public String getRef() {
+        return getComponent(mParsed.mRefBegin, mParsed.mRefLength);
+    }
+
+    /**
+     * @return Whether the GURL is the empty String.
+     */
+    public boolean isEmpty() {
+        return mSpec.isEmpty();
+    }
+
+    /**
+     * See native GURL::GetOrigin().
+     */
+    public GURL getOrigin() {
+        GURL target = new GURL();
+        getOriginInternal(target);
+        return target;
+    }
+
+    protected void getOriginInternal(GURL target) {
+        getNatives().getOrigin(mSpec, mIsValid, mParsed.toNativeParsed(), target);
+    }
+
+    /**
+     * See native GURL::DomainIs().
+     */
+    public boolean domainIs(String domain) {
+        return getNatives().domainIs(mSpec, mIsValid, mParsed.toNativeParsed(), domain);
+    }
+
+    @Override
+    public final int hashCode() {
+        return mSpec.hashCode();
+    }
+
+    @Override
+    public final boolean equals(Object other) {
+        if (other == this) return true;
+        if (!(other instanceof GURL)) return false;
+        return mSpec.equals(((GURL) other).mSpec);
+    }
+
+    /**
+     * Serialize a GURL to a String, to be used with {@link GURL#deserialize(String)}.
+     *
+     * Note that a serialized GURL should only be used internally to Chrome, and should *never* be
+     * used if coming from an untrusted source.
+     *
+     * @return A serialzed GURL.
+     */
+    public final String serialize() {
+        StringBuilder builder = new StringBuilder();
+        builder.append(SERIALIZER_VERSION).append(SERIALIZER_DELIMITER);
+        builder.append(mIsValid).append(SERIALIZER_DELIMITER);
+        builder.append(mParsed.serialize()).append(SERIALIZER_DELIMITER);
+        builder.append(mSpec);
+        String serialization = builder.toString();
+        return Integer.toString(serialization.length()) + SERIALIZER_DELIMITER + serialization;
+    }
+
+    /**
+     * Deserialize a GURL serialized with {@link GURL#serialize()}. This will re-parse in case of
+     * version mismatch, which may trigger undesired native loading. {@see
+     * deserializeLatestVersionOnly} if you want to fail in case of version mismatch.
+     *
+     * This function should *never* be used on a String coming from an untrusted source.
+     *
+     * @return The deserialized GURL (or null if the input is empty).
+     */
+    public static GURL deserialize(@Nullable String gurl) {
+        try {
+            return deserializeLatestVersionOnly(gurl);
+        } catch (BadSerializerVersionException be) {
+            // Just re-parse the GURL on version changes.
+            String[] tokens = gurl.split(Character.toString(SERIALIZER_DELIMITER));
+            return new GURL(getSpecFromTokens(gurl, tokens));
+        } catch (Exception e) {
+            // This is unexpected, maybe the storage got corrupted somehow?
+            Log.w(TAG, "Exception while deserializing a GURL: " + gurl, e);
+            return emptyGURL();
+        }
+    }
+
+    /**
+     * Deserialize a GURL serialized with {@link #serialize()}, throwing {@code
+     * BadSerializerException} if the serialized input has a version other than the latest. This
+     * function should never be used on a String coming from an untrusted source.
+     */
+    public static GURL deserializeLatestVersionOnly(@Nullable String gurl) {
+        if (TextUtils.isEmpty(gurl)) return emptyGURL();
+        String[] tokens = gurl.split(Character.toString(SERIALIZER_DELIMITER));
+
+        // First token MUST always be the length of the serialized data.
+        String length = tokens[0];
+        if (gurl.length() != Integer.parseInt(length) + length.length() + 1) {
+            throw new IllegalArgumentException("Serialized GURL had the wrong length.");
+        }
+
+        String spec = getSpecFromTokens(gurl, tokens);
+        // Second token MUST always be the version number.
+        int version = Integer.parseInt(tokens[1]);
+        if (version != SERIALIZER_VERSION) {
+            throw new BadSerializerVersionException();
+        }
+
+        boolean isValid = Boolean.parseBoolean(tokens[2]);
+        Parsed parsed = Parsed.deserialize(tokens, 3);
+        GURL result = new GURL();
+        result.init(spec, isValid, parsed);
+        return result;
+    }
+
+    private static String getSpecFromTokens(String gurl, String[] tokens) {
+        // Last token MUST always be the original spec.
+        // Special case for empty spec - it won't get its own token.
+        return gurl.endsWith(Character.toString(SERIALIZER_DELIMITER)) ? ""
+                                                                       : tokens[tokens.length - 1];
+    }
+
+    /**
+     * Returns the instance of {@link Natives}. The Robolectric Shadow intercepts invocations of
+     * this method.
+     *
+     * <p>Unlike {@code GURLJni.TEST_HOOKS.setInstanceForTesting}, shadowing this method doesn't
+     * rely on tests correctly cleaning up global state.
+     */
+    private static Natives getNatives() {
+        return GURLJni.get();
+    }
+
+    /** Inits this GURL with the internal state of another GURL. */
+    @VisibleForTesting
+    /* package */ void initForTesting(GURL gurl) {
+        init(gurl.mSpec, gurl.mIsValid, gurl.mParsed);
+    }
+
+    /** @return A Mojom representation of this URL. */
+    public Url toMojom() {
+        Url url = new Url();
+        // See url/mojom/url_gurl_mojom_traits.cc.
+        url.url = TextUtils.isEmpty(getPossiblyInvalidSpec())
+                        || getPossiblyInvalidSpec().length() > UrlConstants.MAX_URL_CHARS
+                        || !isValid()
+                ? ""
+                : getPossiblyInvalidSpec();
+        return url;
+    }
+
+    @NativeMethods
+    interface Natives {
+        /**
+         * Initializes the provided |target| by parsing the provided |uri|.
+         */
+        void init(String uri, GURL target);
+
+        /**
+         * Reconstructs the native GURL for this Java GURL and initializes |target| with its Origin.
+         */
+        void getOrigin(String spec, boolean isValid, long nativeParsed, GURL target);
+
+        /**
+         * Reconstructs the native GURL for this Java GURL, and calls GURL.DomainIs.
+         */
+        boolean domainIs(String spec, boolean isValid, long nativeParsed, String domain);
+
+        /**
+         * Reconstructs the native GURL for this Java GURL, returning its native pointer.
+         */
+        long createNative(String spec, boolean isValid, long nativeParsed);
+    }
+}
diff --git a/android/java/src/org/chromium/url/IDNStringUtil.java b/android/java/src/org/chromium/url/IDNStringUtil.java
new file mode 100644
index 00000000000..10957b673f3
--- /dev/null
+++ b/android/java/src/org/chromium/url/IDNStringUtil.java
@@ -0,0 +1,33 @@
+// Copyright 2014 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import org.chromium.base.annotations.CalledByNative;
+import org.chromium.base.annotations.JNINamespace;
+
+import java.net.IDN;
+
+/**
+ * This class is used to convert unicode IDN domain names to ASCII, when not
+ * building with ICU.
+ */
+@JNINamespace("url::android")
+public class IDNStringUtil {
+    /**
+     * Attempts to convert a Unicode string to an ASCII string using IDN rules.
+     * As of May 2014, the underlying Java function IDNA2003.
+     * @param src String to convert.
+     * @return: String containing only ASCII characters on success, null on
+     *                 failure.
+     */
+    @CalledByNative
+    private static String idnToASCII(String src) {
+        try {
+            return IDN.toASCII(src, IDN.USE_STD3_ASCII_RULES);
+        } catch (Exception e) {
+            return null;
+        }
+    }
+}
\ No newline at end of file
diff --git a/android/java/src/org/chromium/url/Origin.java b/android/java/src/org/chromium/url/Origin.java
new file mode 100644
index 00000000000..87ce87066e1
--- /dev/null
+++ b/android/java/src/org/chromium/url/Origin.java
@@ -0,0 +1,114 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import org.chromium.base.annotations.CalledByNative;
+import org.chromium.base.annotations.JNINamespace;
+import org.chromium.base.annotations.NativeMethods;
+
+/** An origin is either a (scheme, host, port) tuple or is opaque. */
+@JNINamespace("url")
+public class Origin {
+    private final String mScheme;
+    private final String mHost;
+    private final short mPort;
+
+    private final boolean mIsOpaque;
+
+    // Serialization of the Unguessable Token. Do not use directly.
+    private final long mTokenHighBits;
+    private final long mTokenLowBits;
+
+    /**
+     * Constructs an opaque origin.
+     */
+    public static Origin createOpaqueOrigin() {
+        return OriginJni.get().createOpaque();
+    }
+
+    /**
+     * See origin.h for many warnings about this method.
+     *
+     * Constructs an Origin from a GURL.
+     */
+    public static Origin create(GURL gurl) {
+        return OriginJni.get().createFromGURL(gurl);
+    }
+
+    /**
+     * Parses a mojo Origin into a Java analogue of the c++ Origin class.
+     *
+     * `org.chromium.url.internal.mojom.Origin`s, are provided by Mojo-generated code but not
+     * intended for direct use (see crbug.com/1156866).
+     *
+     * @return A Java equivalent of the c++ Origin represented by the provided mojo Origin.
+     */
+    public Origin(org.chromium.url.internal.mojom.Origin mojoOrigin) {
+        mScheme = mojoOrigin.scheme;
+        mHost = mojoOrigin.host;
+        mPort = mojoOrigin.port;
+        if (mojoOrigin.nonceIfOpaque != null) {
+            mIsOpaque = true;
+            mTokenHighBits = mojoOrigin.nonceIfOpaque.high;
+            mTokenLowBits = mojoOrigin.nonceIfOpaque.low;
+        } else {
+            mIsOpaque = false;
+            mTokenHighBits = 0;
+            mTokenLowBits = 0;
+        }
+    }
+
+    @CalledByNative
+    private Origin(String scheme, String host, short port, boolean isOpaque, long tokenHighBits,
+            long tokenLowBits) {
+        mScheme = scheme;
+        mHost = host;
+        mPort = port;
+        mIsOpaque = isOpaque;
+        mTokenHighBits = tokenHighBits;
+        mTokenLowBits = tokenLowBits;
+    }
+
+    /** @return The scheme of the origin. Returns an empty string for an opaque origin. */
+    public String getScheme() {
+        return !isOpaque() ? mScheme : "";
+    }
+
+    /** @return The host of the origin. Returns an empty string for an opaque origin. */
+    public String getHost() {
+        return !isOpaque() ? mHost : "";
+    }
+
+    /** @return The port of the origin. Returns 0 for an opaque origin. */
+    public int getPort() {
+        return !isOpaque() ? Short.toUnsignedInt(mPort) : 0;
+    }
+
+    /** @return Whether the origin is opaque. */
+    public boolean isOpaque() {
+        return mIsOpaque;
+    }
+
+    @CalledByNative
+    private long toNativeOrigin() {
+        return OriginJni.get().createNative(
+                mScheme, mHost, mPort, mIsOpaque, mTokenHighBits, mTokenLowBits);
+    }
+
+    @NativeMethods
+    interface Natives {
+        /** Constructs a new Opaque origin. */
+        Origin createOpaque();
+
+        /** Constructs an Origin from a GURL. */
+        Origin createFromGURL(GURL gurl);
+
+        /**
+         * Reconstructs the native Origin for this Java Origin, returning its native pointer.
+         */
+        long createNative(String scheme, String host, short port, boolean isOpaque,
+                long tokenHighBits, long tokenLowBits);
+    }
+}
diff --git a/android/java/src/org/chromium/url/Parsed.java b/android/java/src/org/chromium/url/Parsed.java
new file mode 100644
index 00000000000..ca41cfb1f4f
--- /dev/null
+++ b/android/java/src/org/chromium/url/Parsed.java
@@ -0,0 +1,141 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import org.chromium.base.annotations.CalledByNative;
+import org.chromium.base.annotations.JNINamespace;
+import org.chromium.base.annotations.NativeMethods;
+import org.chromium.build.annotations.MainDex;
+
+/**
+ * A java wrapper for Parsed, GURL's internal parsed URI representation.
+ */
+@MainDex
+@JNINamespace("url")
+/* package */ class Parsed {
+    /* package */ final int mSchemeBegin;
+    /* package */ final int mSchemeLength;
+    /* package */ final int mUsernameBegin;
+    /* package */ final int mUsernameLength;
+    /* package */ final int mPasswordBegin;
+    /* package */ final int mPasswordLength;
+    /* package */ final int mHostBegin;
+    /* package */ final int mHostLength;
+    /* package */ final int mPortBegin;
+    /* package */ final int mPortLength;
+    /* package */ final int mPathBegin;
+    /* package */ final int mPathLength;
+    /* package */ final int mQueryBegin;
+    /* package */ final int mQueryLength;
+    /* package */ final int mRefBegin;
+    /* package */ final int mRefLength;
+    private final Parsed mInnerUrl;
+    private final boolean mPotentiallyDanglingMarkup;
+
+    /* package */ static Parsed createEmpty() {
+        return new Parsed(0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, false, null);
+    }
+
+    @CalledByNative
+    private Parsed(int schemeBegin, int schemeLength, int usernameBegin, int usernameLength,
+            int passwordBegin, int passwordLength, int hostBegin, int hostLength, int portBegin,
+            int portLength, int pathBegin, int pathLength, int queryBegin, int queryLength,
+            int refBegin, int refLength, boolean potentiallyDanglingMarkup, Parsed innerUrl) {
+        mSchemeBegin = schemeBegin;
+        mSchemeLength = schemeLength;
+        mUsernameBegin = usernameBegin;
+        mUsernameLength = usernameLength;
+        mPasswordBegin = passwordBegin;
+        mPasswordLength = passwordLength;
+        mHostBegin = hostBegin;
+        mHostLength = hostLength;
+        mPortBegin = portBegin;
+        mPortLength = portLength;
+        mPathBegin = pathBegin;
+        mPathLength = pathLength;
+        mQueryBegin = queryBegin;
+        mQueryLength = queryLength;
+        mRefBegin = refBegin;
+        mRefLength = refLength;
+        mPotentiallyDanglingMarkup = potentiallyDanglingMarkup;
+        mInnerUrl = innerUrl;
+    }
+
+    /* package */ long toNativeParsed() {
+        long inner = 0;
+        if (mInnerUrl != null) {
+            inner = mInnerUrl.toNativeParsed();
+        }
+        return ParsedJni.get().createNative(mSchemeBegin, mSchemeLength, mUsernameBegin,
+                mUsernameLength, mPasswordBegin, mPasswordLength, mHostBegin, mHostLength,
+                mPortBegin, mPortLength, mPathBegin, mPathLength, mQueryBegin, mQueryLength,
+                mRefBegin, mRefLength, mPotentiallyDanglingMarkup, inner);
+    }
+
+    /* package */ String serialize() {
+        StringBuilder builder = new StringBuilder();
+        builder.append(mSchemeBegin).append(GURL.SERIALIZER_DELIMITER);
+        builder.append(mSchemeLength).append(GURL.SERIALIZER_DELIMITER);
+        builder.append(mUsernameBegin).append(GURL.SERIALIZER_DELIMITER);
+        builder.append(mUsernameLength).append(GURL.SERIALIZER_DELIMITER);
+        builder.append(mPasswordBegin).append(GURL.SERIALIZER_DELIMITER);
+        builder.append(mPasswordLength).append(GURL.SERIALIZER_DELIMITER);
+        builder.append(mHostBegin).append(GURL.SERIALIZER_DELIMITER);
+        builder.append(mHostLength).append(GURL.SERIALIZER_DELIMITER);
+        builder.append(mPortBegin).append(GURL.SERIALIZER_DELIMITER);
+        builder.append(mPortLength).append(GURL.SERIALIZER_DELIMITER);
+        builder.append(mPathBegin).append(GURL.SERIALIZER_DELIMITER);
+        builder.append(mPathLength).append(GURL.SERIALIZER_DELIMITER);
+        builder.append(mQueryBegin).append(GURL.SERIALIZER_DELIMITER);
+        builder.append(mQueryLength).append(GURL.SERIALIZER_DELIMITER);
+        builder.append(mRefBegin).append(GURL.SERIALIZER_DELIMITER);
+        builder.append(mRefLength).append(GURL.SERIALIZER_DELIMITER);
+        builder.append(mPotentiallyDanglingMarkup).append(GURL.SERIALIZER_DELIMITER);
+        builder.append(mInnerUrl != null);
+        if (mInnerUrl != null) {
+            builder.append(GURL.SERIALIZER_DELIMITER).append(mInnerUrl.serialize());
+        }
+        return builder.toString();
+    }
+
+    /* package */ static Parsed deserialize(String[] tokens, int startIndex) {
+        int schemeBegin = Integer.parseInt(tokens[startIndex++]);
+        int schemeLength = Integer.parseInt(tokens[startIndex++]);
+        int usernameBegin = Integer.parseInt(tokens[startIndex++]);
+        int usernameLength = Integer.parseInt(tokens[startIndex++]);
+        int passwordBegin = Integer.parseInt(tokens[startIndex++]);
+        int passwordLength = Integer.parseInt(tokens[startIndex++]);
+        int hostBegin = Integer.parseInt(tokens[startIndex++]);
+        int hostLength = Integer.parseInt(tokens[startIndex++]);
+        int portBegin = Integer.parseInt(tokens[startIndex++]);
+        int portLength = Integer.parseInt(tokens[startIndex++]);
+        int pathBegin = Integer.parseInt(tokens[startIndex++]);
+        int pathLength = Integer.parseInt(tokens[startIndex++]);
+        int queryBegin = Integer.parseInt(tokens[startIndex++]);
+        int queryLength = Integer.parseInt(tokens[startIndex++]);
+        int refBegin = Integer.parseInt(tokens[startIndex++]);
+        int refLength = Integer.parseInt(tokens[startIndex++]);
+        boolean potentiallyDanglingMarkup = Boolean.parseBoolean(tokens[startIndex++]);
+        Parsed innerParsed = null;
+        if (Boolean.parseBoolean(tokens[startIndex++])) {
+            innerParsed = Parsed.deserialize(tokens, startIndex);
+        }
+        return new Parsed(schemeBegin, schemeLength, usernameBegin, usernameLength, passwordBegin,
+                passwordLength, hostBegin, hostLength, portBegin, portLength, pathBegin, pathLength,
+                queryBegin, queryLength, refBegin, refLength, potentiallyDanglingMarkup,
+                innerParsed);
+    }
+
+    @NativeMethods
+    interface Natives {
+        /**
+         * Create and return the pointer to a native Parsed.
+         */
+        long createNative(int schemeBegin, int schemeLength, int usernameBegin, int usernameLength,
+                int passwordBegin, int passwordLength, int hostBegin, int hostLength, int portBegin,
+                int portLength, int pathBegin, int pathLength, int queryBegin, int queryLength,
+                int refBegin, int refLength, boolean potentiallyDanglingMarkup, long innerUrl);
+    }
+}
diff --git a/android/java/src/org/chromium/url/URI.java b/android/java/src/org/chromium/url/URI.java
new file mode 100644
index 00000000000..e83d6157791
--- /dev/null
+++ b/android/java/src/org/chromium/url/URI.java
@@ -0,0 +1,61 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import java.net.URISyntaxException;
+
+/**
+ * An API shim around GURL that mostly matches the java.net.URI API.
+ *
+ * @deprecated Please use GURL directly in new code.
+ */
+@Deprecated
+public class URI extends GURL {
+    /**
+     * Create a new GURL with a java.net.URI API shim.
+     */
+    public URI(String uri) throws URISyntaxException {
+        super(uri);
+        if (!isValid()) {
+            throw new URISyntaxException(uri, "Uri could not be parsed as a valid GURL");
+        }
+    }
+
+    private URI() {}
+
+    /**
+     * This function is a convenience wrapper around {@link URI#URI(String)}, that wraps the thrown
+     * thrown URISyntaxException in an IllegalArgumentException and throws that instead.
+     */
+    public static URI create(String str) {
+        try {
+            return new URI(str);
+        } catch (URISyntaxException e) {
+            throw new IllegalArgumentException(e);
+        }
+    }
+
+    @Override
+    public URI getOrigin() {
+        URI target = new URI();
+        getOriginInternal(target);
+        return target;
+    }
+
+    /** See {@link GURL#getRef()} */
+    public String getFragment() {
+        return getRef();
+    }
+
+    /** See {@link java.net.URI#isAbsolute()} */
+    public boolean isAbsolute() {
+        return !getScheme().isEmpty();
+    }
+
+    @Override
+    public String toString() {
+        return getPossiblyInvalidSpec();
+    }
+}
diff --git a/android/javatests/DEPS b/android/javatests/DEPS
new file mode 100644
index 00000000000..aa935913119
--- /dev/null
+++ b/android/javatests/DEPS
@@ -0,0 +1,3 @@
+include_rules = [
+  "+content/public/test/android",
+]
diff --git a/android/javatests/src/org/chromium/url/GURLJavaTest.java b/android/javatests/src/org/chromium/url/GURLJavaTest.java
new file mode 100644
index 00000000000..e684e510338
--- /dev/null
+++ b/android/javatests/src/org/chromium/url/GURLJavaTest.java
@@ -0,0 +1,314 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.doThrow;
+
+import androidx.test.filters.SmallTest;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import org.chromium.base.test.BaseJUnit4ClassRunner;
+import org.chromium.base.test.util.Batch;
+import org.chromium.content_public.browser.test.NativeLibraryTestUtils;
+
+import java.net.URISyntaxException;
+
+/**
+ * Tests for {@link GURL}. GURL relies heavily on the native implementation, and the lion's share of
+ * the logic is tested there. This test is primarily to make sure everything is plumbed through
+ * correctly.
+ */
+@RunWith(BaseJUnit4ClassRunner.class)
+@Batch(Batch.UNIT_TESTS)
+public class GURLJavaTest {
+    @Mock
+    GURL.Natives mGURLMocks;
+
+    @Before
+    public void setUp() {
+        MockitoAnnotations.initMocks(this);
+
+        NativeLibraryTestUtils.loadNativeLibraryNoBrowserProcess();
+        GURLJavaTestHelper.nativeInitializeICU();
+    }
+
+    /* package */ static void deepAssertEquals(GURL expected, GURL actual) {
+        Assert.assertEquals(expected, actual);
+        Assert.assertEquals(expected.getScheme(), actual.getScheme());
+        Assert.assertEquals(expected.getUsername(), actual.getUsername());
+        Assert.assertEquals(expected.getPassword(), actual.getPassword());
+        Assert.assertEquals(expected.getHost(), actual.getHost());
+        Assert.assertEquals(expected.getPort(), actual.getPort());
+        Assert.assertEquals(expected.getPath(), actual.getPath());
+        Assert.assertEquals(expected.getQuery(), actual.getQuery());
+        Assert.assertEquals(expected.getRef(), actual.getRef());
+    }
+
+    private String prependLengthToSerialization(String serialization) {
+        return Integer.toString(serialization.length()) + GURL.SERIALIZER_DELIMITER + serialization;
+    }
+
+    @SmallTest
+    @Test
+    public void testGURLEquivalence() {
+        GURLJavaTestHelper.nativeTestGURLEquivalence();
+    }
+
+    // Equivalent of GURLTest.Components
+    @SmallTest
+    @Test
+    @SuppressWarnings(value = "AuthLeak")
+    public void testComponents() {
+        GURL empty = new GURL("");
+        Assert.assertTrue(empty.isEmpty());
+        Assert.assertFalse(empty.isValid());
+
+        GURL url = new GURL("http://user:pass@google.com:99/foo;bar?q=a#ref");
+        Assert.assertFalse(url.isEmpty());
+        Assert.assertTrue(url.isValid());
+        Assert.assertTrue(url.getScheme().equals("http"));
+
+        Assert.assertEquals("http://user:pass@google.com:99/foo;bar?q=a#ref", url.getSpec());
+
+        Assert.assertEquals("http", url.getScheme());
+        Assert.assertEquals("user", url.getUsername());
+        Assert.assertEquals("pass", url.getPassword());
+        Assert.assertEquals("google.com", url.getHost());
+        Assert.assertEquals("99", url.getPort());
+        Assert.assertEquals("/foo;bar", url.getPath());
+        Assert.assertEquals("q=a", url.getQuery());
+        Assert.assertEquals("ref", url.getRef());
+
+        // Test parsing userinfo with special characters.
+        GURL urlSpecialPass = new GURL("http://user:%40!$&'()*+,;=:@google.com:12345");
+        Assert.assertTrue(urlSpecialPass.isValid());
+        // GURL canonicalizes some delimiters.
+        Assert.assertEquals("%40!$&%27()*+,%3B%3D%3A", urlSpecialPass.getPassword());
+        Assert.assertEquals("google.com", urlSpecialPass.getHost());
+        Assert.assertEquals("12345", urlSpecialPass.getPort());
+    }
+
+    // Equivalent of GURLTest.Empty
+    @SmallTest
+    @Test
+    public void testEmpty() {
+        GURLJni.TEST_HOOKS.setInstanceForTesting(mGURLMocks);
+        doThrow(new RuntimeException("Should not need to parse empty URL"))
+                .when(mGURLMocks)
+                .init(any(), any());
+        GURL url = new GURL("");
+        Assert.assertFalse(url.isValid());
+        Assert.assertEquals("", url.getSpec());
+
+        Assert.assertEquals("", url.getScheme());
+        Assert.assertEquals("", url.getUsername());
+        Assert.assertEquals("", url.getPassword());
+        Assert.assertEquals("", url.getHost());
+        Assert.assertEquals("", url.getPort());
+        Assert.assertEquals("", url.getPath());
+        Assert.assertEquals("", url.getQuery());
+        Assert.assertEquals("", url.getRef());
+        GURLJni.TEST_HOOKS.setInstanceForTesting(null);
+    }
+
+    // Test that GURL and URI return the correct Origin.
+    @SmallTest
+    @Test
+    @SuppressWarnings(value = "AuthLeak")
+    public void testOrigin() throws URISyntaxException {
+        final String kExpectedOrigin1 = "http://google.com:21/";
+        final String kExpectedOrigin2 = "";
+        GURL url1 = new GURL("filesystem:http://user:pass@google.com:21/blah#baz");
+        GURL url2 = new GURL("javascript:window.alert(\"hello,world\");");
+        URI uri = new URI("filesystem:http://user:pass@google.com:21/blah#baz");
+
+        Assert.assertEquals(kExpectedOrigin1, url1.getOrigin().getSpec());
+        Assert.assertEquals(kExpectedOrigin2, url2.getOrigin().getSpec());
+        URI origin = uri.getOrigin();
+        Assert.assertEquals(kExpectedOrigin1, origin.getSpec());
+    }
+
+    @SmallTest
+    @Test
+    public void testWideInput() throws URISyntaxException {
+        final String kExpectedSpec = "http://xn--1xa.com/";
+
+        GURL url = new GURL("http://\u03C0.com");
+        Assert.assertEquals(kExpectedSpec, url.getSpec());
+        Assert.assertEquals("http", url.getScheme());
+        Assert.assertEquals("", url.getUsername());
+        Assert.assertEquals("", url.getPassword());
+        Assert.assertEquals("xn--1xa.com", url.getHost());
+        Assert.assertEquals("", url.getPort());
+        Assert.assertEquals("/", url.getPath());
+        Assert.assertEquals("", url.getQuery());
+        Assert.assertEquals("", url.getRef());
+    }
+
+    @SmallTest
+    @Test
+    @SuppressWarnings(value = "AuthLeak")
+    public void testSerialization() {
+        GURL cases[] = {
+                // Common Standard URLs.
+                new GURL("https://www.google.com"),
+                new GURL("https://www.google.com/"),
+                new GURL("https://www.google.com/maps.htm"),
+                new GURL("https://www.google.com/maps/"),
+                new GURL("https://www.google.com/index.html"),
+                new GURL("https://www.google.com/index.html?q=maps"),
+                new GURL("https://www.google.com/index.html#maps/"),
+                new GURL("https://foo:bar@www.google.com/maps.htm"),
+                new GURL("https://www.google.com/maps/au/index.html"),
+                new GURL("https://www.google.com/maps/au/north"),
+                new GURL("https://www.google.com/maps/au/north/"),
+                new GURL("https://www.google.com/maps/au/index.html?q=maps#fragment/"),
+                new GURL("http://www.google.com:8000/maps/au/index.html?q=maps#fragment/"),
+                new GURL("https://www.google.com/maps/au/north/?q=maps#fragment"),
+                new GURL("https://www.google.com/maps/au/north?q=maps#fragment"),
+                // Less common standard URLs.
+                new GURL("filesystem:http://www.google.com/temporary/bar.html?baz=22"),
+                new GURL("file:///temporary/bar.html?baz=22"),
+                new GURL("ftp://foo/test/index.html"),
+                new GURL("gopher://foo/test/index.html"),
+                new GURL("ws://foo/test/index.html"),
+                // Non-standard,
+                new GURL("chrome://foo/bar.html"),
+                new GURL("httpa://foo/test/index.html"),
+                new GURL("blob:https://foo.bar/test/index.html"),
+                new GURL("about:blank"),
+                new GURL("data:foobar"),
+                new GURL("scheme:opaque_data"),
+                // Invalid URLs.
+                new GURL("foobar"),
+                // URLs containing the delimiter
+                new GURL("https://www.google.ca/" + GURL.SERIALIZER_DELIMITER + ",foo"),
+                new GURL("https://www.foo" + GURL.SERIALIZER_DELIMITER + "bar.com"),
+        };
+
+        GURLJni.TEST_HOOKS.setInstanceForTesting(mGURLMocks);
+        doThrow(new RuntimeException("Should not re-initialize for deserialization when the "
+                        + "version hasn't changed."))
+                .when(mGURLMocks)
+                .init(any(), any());
+        for (GURL url : cases) {
+            GURL out = GURL.deserialize(url.serialize());
+            deepAssertEquals(url, out);
+        }
+        GURLJni.TEST_HOOKS.setInstanceForTesting(null);
+    }
+
+    /**
+     * Tests that we re-parse the URL from the spec, which must always be the last token in the
+     * serialization, if the serialization version differs.
+     */
+    @SmallTest
+    @Test
+    public void testSerializationWithVersionSkew() {
+        GURL url = new GURL("https://www.google.com");
+        String serialization = (GURL.SERIALIZER_VERSION + 1)
+                + ",0,0,0,0,foo,https://url.bad,blah,0,".replace(',', GURL.SERIALIZER_DELIMITER)
+                + url.getSpec();
+        serialization = prependLengthToSerialization(serialization);
+        GURL out = GURL.deserialize(serialization);
+        deepAssertEquals(url, out);
+    }
+
+    /**
+     * Tests that fields that aren't visible to java code are correctly serialized.
+     */
+    @SmallTest
+    @Test
+    public void testSerializationOfPrivateFields() {
+        String serialization = GURL.SERIALIZER_VERSION
+                + ",true,"
+                // Outer Parsed.
+                + "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,false,true,"
+                // Inner Parsed.
+                + "17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,true,false,"
+                + "chrome://foo/bar.html";
+        serialization = serialization.replace(',', GURL.SERIALIZER_DELIMITER);
+        serialization = prependLengthToSerialization(serialization);
+        GURL url = GURL.deserialize(serialization);
+        Assert.assertEquals(url.serialize(), serialization);
+    }
+
+    /**
+     * Tests serialized GURL truncated by storage.
+     */
+    @SmallTest
+    @Test
+    public void testTruncatedDeserialization() {
+        String serialization = "123,1,true,1,2,3,4,5,6,7,8,9,10";
+        serialization = serialization.replace(',', GURL.SERIALIZER_DELIMITER);
+        GURL url = GURL.deserialize(serialization);
+        Assert.assertEquals(url, GURL.emptyGURL());
+    }
+
+    /**
+     * Tests serialized GURL truncated by storage.
+     */
+    @SmallTest
+    @Test
+    public void testCorruptedSerializations() {
+        String serialization = new GURL("https://www.google.ca").serialize();
+        // Replace the scheme length (5) with an extra delimiter.
+        String corruptedParsed = serialization.replace('5', GURL.SERIALIZER_DELIMITER);
+        GURL url = GURL.deserialize(corruptedParsed);
+        Assert.assertEquals(GURL.emptyGURL(), url);
+
+        String corruptedVersion =
+                serialization.replaceFirst(Integer.toString(GURL.SERIALIZER_VERSION), "x");
+        url = GURL.deserialize(corruptedVersion);
+        Assert.assertEquals(GURL.emptyGURL(), url);
+    }
+
+    // Test that domainIs is hooked up correctly.
+    @SmallTest
+    @Test
+    public void testDomainIs() {
+        GURL url1 = new GURL("https://www.google.com");
+        GURL url2 = new GURL("https://www.notgoogle.com");
+
+        Assert.assertTrue(url1.domainIs("com"));
+        Assert.assertTrue(url2.domainIs("com"));
+        Assert.assertTrue(url1.domainIs("google.com"));
+        Assert.assertFalse(url2.domainIs("google.com"));
+
+        Assert.assertTrue(url1.domainIs("www.google.com"));
+        Assert.assertFalse(url1.domainIs("images.google.com"));
+    }
+
+    // Tests Mojom conversion.
+    @SmallTest
+    @Test
+    public void testMojomConvertion() {
+        // Valid:
+        Assert.assertEquals(
+                "https://www.google.com/", new GURL("https://www.google.com/").toMojom().url);
+
+        // Null:
+        Assert.assertEquals("", new GURL(null).toMojom().url);
+
+        // Empty:
+        Assert.assertEquals("", new GURL("").toMojom().url);
+
+        // Invalid:
+        Assert.assertEquals("", new GURL(new String(new byte[] {1, 1, 1})).toMojom().url);
+
+        // Too long.
+        Assert.assertEquals("",
+                new GURL("https://www.google.com/".concat("a".repeat(2 * 1024 * 1024)))
+                        .toMojom()
+                        .url);
+    }
+}
diff --git a/android/javatests/src/org/chromium/url/GURLJavaTestHelper.java b/android/javatests/src/org/chromium/url/GURLJavaTestHelper.java
new file mode 100644
index 00000000000..975b009dc33
--- /dev/null
+++ b/android/javatests/src/org/chromium/url/GURLJavaTestHelper.java
@@ -0,0 +1,34 @@
+// Copyright 2020 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import org.chromium.base.annotations.CalledByNative;
+import org.chromium.base.annotations.JNINamespace;
+import org.chromium.base.annotations.NativeMethods;
+
+/**
+ * Helpers for GURLJavaTest that need to call into native code.
+ */
+@JNINamespace("url")
+public class GURLJavaTestHelper {
+    @CalledByNative
+    public static GURL createGURL(String uri) {
+        return new GURL(uri);
+    }
+
+    public static void nativeInitializeICU() {
+        GURLJavaTestHelperJni.get().initializeICU();
+    }
+
+    public static void nativeTestGURLEquivalence() {
+        GURLJavaTestHelperJni.get().testGURLEquivalence();
+    }
+
+    @NativeMethods
+    interface Natives {
+        void initializeICU();
+        void testGURLEquivalence();
+    }
+}
diff --git a/android/javatests/src/org/chromium/url/JUnitTestGURLsTest.java b/android/javatests/src/org/chromium/url/JUnitTestGURLsTest.java
new file mode 100644
index 00000000000..a23967c2496
--- /dev/null
+++ b/android/javatests/src/org/chromium/url/JUnitTestGURLsTest.java
@@ -0,0 +1,73 @@
+// Copyright 2020 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.doThrow;
+
+import androidx.test.filters.SmallTest;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import org.chromium.base.Log;
+import org.chromium.base.test.BaseJUnit4ClassRunner;
+import org.chromium.base.test.util.Batch;
+
+import java.util.Map;
+
+/**
+ * Tests for JUnitTestGURLs.
+ */
+@RunWith(BaseJUnit4ClassRunner.class)
+@Batch(Batch.UNIT_TESTS)
+public class JUnitTestGURLsTest {
+    private static final String TAG = "JUnitTestGURLs";
+
+    @Mock
+    GURL.Natives mGURLMocks;
+
+    @Before
+    public void setUp() {
+        MockitoAnnotations.initMocks(this);
+    }
+
+    private RuntimeException getErrorForGURL(GURL gurl) {
+        String serialized = gurl.serialize();
+        Assert.assertEquals(-1, serialized.indexOf(","));
+        serialized = serialized.replace(GURL.SERIALIZER_DELIMITER, ',');
+
+        return new RuntimeException("Please update the serialization in JUnitTestGURLs.java for "
+                + gurl.getPossiblyInvalidSpec() + " to: '" + serialized + "'");
+    }
+
+    @SmallTest
+    @Test
+    public void testGURLEquivalence() throws Throwable {
+        doThrow(new RuntimeException("Deserialization required re-initialization."))
+                .when(mGURLMocks)
+                .init(any(), any());
+
+        Throwable exception = null;
+        for (Map.Entry<String, String> entry : JUnitTestGURLs.sGURLMap.entrySet()) {
+            GURL gurl = new GURL(entry.getKey());
+            try {
+                GURLJni.TEST_HOOKS.setInstanceForTesting(mGURLMocks);
+                GURL deserialized = JUnitTestGURLs.getGURL(entry.getKey());
+                GURLJni.TEST_HOOKS.setInstanceForTesting(null);
+                GURLJavaTest.deepAssertEquals(deserialized, gurl);
+            } catch (Throwable e) {
+                GURLJni.TEST_HOOKS.setInstanceForTesting(null);
+                exception = getErrorForGURL(gurl);
+                Log.e(TAG, "Error: ", exception);
+            }
+        }
+        if (exception != null) throw exception;
+    }
+}
diff --git a/android/javatests/src/org/chromium/url/OriginJavaTest.java b/android/javatests/src/org/chromium/url/OriginJavaTest.java
new file mode 100644
index 00000000000..3a4665af7b5
--- /dev/null
+++ b/android/javatests/src/org/chromium/url/OriginJavaTest.java
@@ -0,0 +1,99 @@
+// Copyright 2022 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import androidx.test.filters.SmallTest;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+import org.chromium.base.test.BaseJUnit4ClassRunner;
+import org.chromium.base.test.util.Batch;
+import org.chromium.content_public.browser.test.NativeLibraryTestUtils;
+import org.chromium.mojo_base.mojom.UnguessableToken;
+
+/**
+ * Tests for {@link Origin}. Origin relies heavily on the native implementation, and the lion's
+ * share of the logic is tested there. This test is primarily to make sure everything is plumbed
+ * through correctly.
+ */
+@RunWith(BaseJUnit4ClassRunner.class)
+@Batch(Batch.UNIT_TESTS)
+public class OriginJavaTest {
+    @Before
+    public void setUp() {
+        NativeLibraryTestUtils.loadNativeLibraryNoBrowserProcess();
+    }
+
+    @SmallTest
+    @Test
+    public void testOriginEquivalence() {
+        OriginJavaTestHelper.testOriginEquivalence();
+    }
+
+    @SmallTest
+    @Test
+    public void testCreateOpaqueOrigin() {
+        Origin opaque = Origin.createOpaqueOrigin();
+        Assert.assertTrue(opaque.isOpaque());
+        Assert.assertEquals("", opaque.getScheme());
+        Assert.assertEquals("", opaque.getHost());
+        Assert.assertEquals(0, opaque.getPort());
+    }
+
+    @SmallTest
+    @Test
+    public void testNonOpaqueMojomConstructor() {
+        String scheme = "http";
+        String host = "host.name";
+        short port = 42;
+        org.chromium.url.internal.mojom.Origin mojom = new org.chromium.url.internal.mojom.Origin();
+        mojom.scheme = scheme;
+        mojom.host = host;
+        mojom.port = port;
+        Origin origin = new Origin(mojom);
+
+        Assert.assertEquals(scheme, origin.getScheme());
+        Assert.assertEquals(host, origin.getHost());
+        Assert.assertEquals(port, origin.getPort());
+        Assert.assertFalse(origin.isOpaque());
+    }
+
+    @SmallTest
+    @Test
+    public void testOpaqueMojomConstructor() {
+        String scheme = "http";
+        String host = "host.name";
+        short port = 42;
+        org.chromium.url.internal.mojom.Origin mojom = new org.chromium.url.internal.mojom.Origin();
+        mojom.scheme = scheme;
+        mojom.host = host;
+        mojom.port = port;
+        UnguessableToken token = new UnguessableToken();
+        token.high = 3;
+        token.low = 4;
+        mojom.nonceIfOpaque = token;
+
+        Origin origin = new Origin(mojom);
+
+        Assert.assertEquals("", origin.getScheme());
+        Assert.assertEquals("", origin.getHost());
+        Assert.assertEquals(0, origin.getPort());
+        Assert.assertTrue(origin.isOpaque());
+    }
+
+    @SmallTest
+    @Test
+    public void testCreateFromGURL() {
+        GURL gurl = new GURL("https://host.name:61234/path");
+        Origin opaque = Origin.create(gurl);
+        Assert.assertFalse(opaque.isOpaque());
+        Assert.assertEquals("https", opaque.getScheme());
+        Assert.assertEquals("host.name", opaque.getHost());
+        Assert.assertEquals(61234, opaque.getPort());
+    }
+}
diff --git a/android/javatests/src/org/chromium/url/OriginJavaTestHelper.java b/android/javatests/src/org/chromium/url/OriginJavaTestHelper.java
new file mode 100644
index 00000000000..2eb9550ba7a
--- /dev/null
+++ b/android/javatests/src/org/chromium/url/OriginJavaTestHelper.java
@@ -0,0 +1,23 @@
+// Copyright 2022 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import org.chromium.base.annotations.JNINamespace;
+import org.chromium.base.annotations.NativeMethods;
+
+/**
+ * Helpers for OriginJavaTest that need to call into native code.
+ */
+@JNINamespace("url")
+public class OriginJavaTestHelper {
+    public static void testOriginEquivalence() {
+        OriginJavaTestHelperJni.get().testOriginEquivalence();
+    }
+
+    @NativeMethods
+    interface Natives {
+        void testOriginEquivalence();
+    }
+}
diff --git a/android/junit/src/org/chromium/url/ShadowGURLTest.java b/android/junit/src/org/chromium/url/ShadowGURLTest.java
new file mode 100644
index 00000000000..a491de1a396
--- /dev/null
+++ b/android/junit/src/org/chromium/url/ShadowGURLTest.java
@@ -0,0 +1,70 @@
+// Copyright 2021 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.robolectric.annotation.Config;
+
+import org.chromium.base.test.BaseRobolectricTestRunner;
+
+/** Tests of {@link ShadowGURL}. */
+@RunWith(BaseRobolectricTestRunner.class)
+@Config(shadows = {ShadowGURL.class})
+public class ShadowGURLTest {
+    /* package */ static void deepAssertEquals(GURL expected, GURL actual) {
+        Assert.assertEquals(expected, actual);
+        Assert.assertEquals(expected.getScheme(), actual.getScheme());
+        Assert.assertEquals(expected.getUsername(), actual.getUsername());
+        Assert.assertEquals(expected.getPassword(), actual.getPassword());
+        Assert.assertEquals(expected.getHost(), actual.getHost());
+        Assert.assertEquals(expected.getPort(), actual.getPort());
+        Assert.assertEquals(expected.getPath(), actual.getPath());
+        Assert.assertEquals(expected.getQuery(), actual.getQuery());
+        Assert.assertEquals(expected.getRef(), actual.getRef());
+    }
+
+    @Test
+    public void testComponents() {
+        GURL url = new GURL(JUnitTestGURLs.SEARCH_URL);
+        Assert.assertFalse(url.isEmpty());
+        Assert.assertTrue(url.isValid());
+
+        Assert.assertEquals(JUnitTestGURLs.SEARCH_URL, url.getSpec());
+        Assert.assertEquals("https", url.getScheme());
+        Assert.assertEquals("", url.getUsername());
+        Assert.assertEquals("", url.getPassword());
+        Assert.assertEquals("www.google.com", url.getHost());
+        Assert.assertEquals("", url.getPort());
+        Assert.assertEquals("/search", url.getPath());
+        Assert.assertEquals("q=test", url.getQuery());
+        Assert.assertEquals("", url.getRef());
+    }
+
+    @Test
+    public void testEmpty() {
+        GURL url = new GURL("");
+        Assert.assertFalse(url.isValid());
+
+        Assert.assertEquals("", url.getSpec());
+        Assert.assertEquals("", url.getScheme());
+        Assert.assertEquals("", url.getUsername());
+        Assert.assertEquals("", url.getPassword());
+        Assert.assertEquals("", url.getHost());
+        Assert.assertEquals("", url.getPort());
+        Assert.assertEquals("", url.getPath());
+        Assert.assertEquals("", url.getQuery());
+        Assert.assertEquals("", url.getRef());
+    }
+
+    @Test
+    public void testSerialization() {
+        GURL gurl = new GURL(JUnitTestGURLs.URL_1_WITH_PATH);
+        GURL deserialized = GURL.deserialize(gurl.serialize());
+
+        deepAssertEquals(deserialized, gurl);
+    }
+}
diff --git a/android/origin_android.cc b/android/origin_android.cc
new file mode 100644
index 00000000000..a0dd271b5ad
--- /dev/null
+++ b/android/origin_android.cc
@@ -0,0 +1,87 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/origin.h"
+
+#include <cstdint>
+
+#include "base/android/jni_android.h"
+#include "base/android/jni_string.h"
+#include "base/android/scoped_java_ref.h"
+#include "base/memory/ptr_util.h"
+#include "url/android/gurl_android.h"
+#include "url/url_jni_headers/Origin_jni.h"
+
+namespace url {
+
+base::android::ScopedJavaLocalRef<jobject> Origin::CreateJavaObject() const {
+  JNIEnv* env = base::android::AttachCurrentThread();
+  const base::UnguessableToken* token = Origin::GetNonceForSerialization();
+  return Java_Origin_Constructor(
+      env, base::android::ConvertUTF8ToJavaString(env, tuple_.scheme()),
+      base::android::ConvertUTF8ToJavaString(env, tuple_.host()), tuple_.port(),
+      opaque(), token ? token->GetHighForSerialization() : 0,
+      token ? token->GetLowForSerialization() : 0);
+}
+
+// static
+Origin Origin::FromJavaObject(
+    const base::android::JavaRef<jobject>& java_origin) {
+  JNIEnv* env = base::android::AttachCurrentThread();
+  std::unique_ptr<Origin> origin = base::WrapUnique<Origin>(
+      reinterpret_cast<Origin*>(Java_Origin_toNativeOrigin(env, java_origin)));
+  return std::move(*origin);
+}
+
+// static
+jlong Origin::CreateNative(JNIEnv* env,
+                           const base::android::JavaRef<jstring>& java_scheme,
+                           const base::android::JavaRef<jstring>& java_host,
+                           uint16_t port,
+                           bool is_opaque,
+                           uint64_t token_high_bits,
+                           uint64_t token_low_bits) {
+  const std::string& scheme = ConvertJavaStringToUTF8(env, java_scheme);
+  const std::string& host = ConvertJavaStringToUTF8(env, java_host);
+
+  absl::optional<base::UnguessableToken> nonce_token =
+      base::UnguessableToken::Deserialize(token_high_bits, token_low_bits);
+  bool has_nonce = nonce_token.has_value();
+  CHECK(has_nonce == is_opaque);
+  Origin::Nonce nonce;
+  if (has_nonce) {
+    nonce = Origin::Nonce(nonce_token.value());
+  }
+  Origin origin = is_opaque
+                      ? Origin::CreateOpaqueFromNormalizedPrecursorTuple(
+                            scheme, host, port, nonce)
+                      : Origin::CreateFromNormalizedTuple(scheme, host, port);
+  return reinterpret_cast<intptr_t>(new Origin(origin));
+}
+
+static base::android::ScopedJavaLocalRef<jobject> JNI_Origin_CreateOpaque(
+    JNIEnv* env) {
+  return Origin().CreateJavaObject();
+}
+
+static base::android::ScopedJavaLocalRef<jobject> JNI_Origin_CreateFromGURL(
+    JNIEnv* env,
+    const base::android::JavaParamRef<jobject>& j_gurl) {
+  return Origin::Create(*GURLAndroid::ToNativeGURL(env, j_gurl))
+      .CreateJavaObject();
+}
+
+static jlong JNI_Origin_CreateNative(
+    JNIEnv* env,
+    const base::android::JavaParamRef<jstring>& java_scheme,
+    const base::android::JavaParamRef<jstring>& java_host,
+    jshort port,
+    jboolean is_opaque,
+    jlong token_high_bits,
+    jlong token_low_bits) {
+  return Origin::CreateNative(env, java_scheme, java_host, port, is_opaque,
+                              token_high_bits, token_low_bits);
+}
+
+}  // namespace url
diff --git a/android/origin_java_test_helper.cc b/android/origin_java_test_helper.cc
new file mode 100644
index 00000000000..62554d87de1
--- /dev/null
+++ b/android/origin_java_test_helper.cc
@@ -0,0 +1,37 @@
+// Copyright 2022 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+
+#include "base/android/jni_android.h"
+#include "base/android/jni_string.h"
+#include "url/gurl.h"
+#include "url/j_test_jni_headers/OriginJavaTestHelper_jni.h"
+#include "url/origin.h"
+
+namespace url {
+
+static void JNI_OriginJavaTestHelper_TestOriginEquivalence(JNIEnv* env) {
+  Origin cases[] = {
+      Origin(),
+      Origin::Create(GURL("http://a.com")),
+      Origin::Create(GURL("http://a.com:8000")),
+      Origin::Create(GURL("scheme:host")),
+      Origin::Create(GURL("http://a.com:8000")).DeriveNewOpaqueOrigin(),
+  };
+  for (const Origin& origin : cases) {
+    base::android::ScopedJavaLocalRef<jobject> j_origin =
+        origin.CreateJavaObject();
+    Origin sameOrigin = Origin::FromJavaObject(j_origin);
+    if (origin != sameOrigin) {
+      std::stringstream ss;
+      ss << "Origin not equivalent: " << origin << ", " << sameOrigin;
+      env->ThrowNew(env->FindClass("java/lang/AssertionError"),
+                    ss.str().data());
+      return;
+    }
+  }
+}
+
+}  // namespace url
diff --git a/android/parsed_android.cc b/android/parsed_android.cc
new file mode 100644
index 00000000000..36d8aa255ef
--- /dev/null
+++ b/android/parsed_android.cc
@@ -0,0 +1,96 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/android/parsed_android.h"
+
+#include <jni.h>
+
+#include "base/android/jni_android.h"
+#include "url/gurl_jni_headers/Parsed_jni.h"
+
+using base::android::AttachCurrentThread;
+using base::android::JavaRef;
+using base::android::ScopedJavaLocalRef;
+
+namespace url {
+
+namespace {
+
+ScopedJavaLocalRef<jobject> CreateJavaParsed(JNIEnv* env,
+                                             const Parsed& parsed,
+                                             const JavaRef<jobject>& inner) {
+  static constexpr bool is_signed =
+      std::is_signed<decltype(parsed.scheme.begin)>::value;
+  static constexpr size_t offset_size = sizeof(parsed.scheme.begin);
+  static_assert((is_signed && sizeof(jint) >= offset_size) ||
+                    (!is_signed && sizeof(jint) > offset_size),
+                "Java size offsets for Parsed Components must be large enough "
+                "to store the full C++ offset.");
+  return Java_Parsed_Constructor(
+      env, parsed.scheme.begin, parsed.scheme.len, parsed.username.begin,
+      parsed.username.len, parsed.password.begin, parsed.password.len,
+      parsed.host.begin, parsed.host.len, parsed.port.begin, parsed.port.len,
+      parsed.path.begin, parsed.path.len, parsed.query.begin, parsed.query.len,
+      parsed.ref.begin, parsed.ref.len, parsed.potentially_dangling_markup,
+      inner);
+}
+
+}  // namespace
+
+// static
+ScopedJavaLocalRef<jobject> ParsedAndroid::InitFromParsed(
+    JNIEnv* env,
+    const Parsed& parsed) {
+  ScopedJavaLocalRef<jobject> inner;
+  if (parsed.inner_parsed())
+    inner = CreateJavaParsed(env, *parsed.inner_parsed(), nullptr);
+  return CreateJavaParsed(env, parsed, inner);
+}
+
+static jlong JNI_Parsed_CreateNative(JNIEnv* env,
+                                     jint scheme_begin,
+                                     jint scheme_length,
+                                     jint username_begin,
+                                     jint username_length,
+                                     jint password_begin,
+                                     jint password_length,
+                                     jint host_begin,
+                                     jint host_length,
+                                     jint port_begin,
+                                     jint port_length,
+                                     jint path_begin,
+                                     jint path_length,
+                                     jint query_begin,
+                                     jint query_length,
+                                     jint ref_begin,
+                                     jint ref_length,
+                                     jboolean potentially_dangling_markup,
+                                     jlong inner_parsed) {
+  Parsed* parsed = new Parsed();
+  parsed->scheme.begin = scheme_begin;
+  parsed->scheme.len = scheme_length;
+  parsed->username.begin = username_begin;
+  parsed->username.len = username_length;
+  parsed->password.begin = password_begin;
+  parsed->password.len = password_length;
+  parsed->host.begin = host_begin;
+  parsed->host.len = host_length;
+  parsed->port.begin = port_begin;
+  parsed->port.len = port_length;
+  parsed->path.begin = path_begin;
+  parsed->path.len = path_length;
+  parsed->query.begin = query_begin;
+  parsed->query.len = query_length;
+  parsed->ref.begin = ref_begin;
+  parsed->ref.len = ref_length;
+  parsed->potentially_dangling_markup = potentially_dangling_markup;
+  Parsed* inner = reinterpret_cast<Parsed*>(inner_parsed);
+  if (inner) {
+    parsed->set_inner_parsed(*inner);
+    delete inner;
+  }
+  return reinterpret_cast<intptr_t>(parsed);
+}
+
+}  // namespace url
diff --git a/android/parsed_android.h b/android/parsed_android.h
new file mode 100644
index 00000000000..244ada55f4b
--- /dev/null
+++ b/android/parsed_android.h
@@ -0,0 +1,22 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_ANDROID_PARSED_ANDROID_H_
+#define URL_ANDROID_PARSED_ANDROID_H_
+
+#include "base/android/scoped_java_ref.h"
+#include "url/third_party/mozilla/url_parse.h"
+
+namespace url {
+
+class ParsedAndroid {
+ public:
+  static base::android::ScopedJavaLocalRef<jobject> InitFromParsed(
+      JNIEnv* env,
+      const Parsed& parsed);
+};
+
+}  // namespace url
+
+#endif  // URL_ANDROID_PARSED_ANDROID_H_
diff --git a/android/robolectric_test_main.cc b/android/robolectric_test_main.cc
new file mode 100644
index 00000000000..28fb4d2410d
--- /dev/null
+++ b/android/robolectric_test_main.cc
@@ -0,0 +1,15 @@
+// Copyright 2022 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+#include <jni.h>
+
+#include "base/android/base_jni_onload.h"
+#include "base/android/jni_android.h"
+
+extern "C" JNI_EXPORT jint JNI_OnLoad(JavaVM* vm, void* reserved) {
+  base::android::InitVM(vm);
+  base::android::OnJNIOnLoadInit();
+  // TODO(1223993): Initialize GURL schemes, like in
+  //     content::RegisterContentSchemes().
+  return JNI_VERSION_1_4;
+}
diff --git a/android/test/java/src/org/chromium/url/JUnitTestGURLs.java b/android/test/java/src/org/chromium/url/JUnitTestGURLs.java
new file mode 100644
index 00000000000..9f19c6c9079
--- /dev/null
+++ b/android/test/java/src/org/chromium/url/JUnitTestGURLs.java
@@ -0,0 +1,174 @@
+// Copyright 2020 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * A Helper class for JUnit tests to be able to use GURLs without requiring native initialization.
+ * This should be used sparingly, when converting junit tests to Batched Instrumentation tests is
+ * not feasible.
+ *
+ * If any more complex GURL behaviour is tested, like comparing Origins, the test should be written
+ * as an Instrumentation test instead - you should never mock GURL.
+ */
+public class JUnitTestGURLs {
+    // In order to add a test URL:
+    // 1. Add the URL String as a constant here.
+    // 2. Add the constant to the map below, with a placeholder string for the GURL serialization.
+    // 3. Run JUnitTestGURLsTest (eg. './tools/autotest.py -C out/Debug JUnitTestGURLsTest').
+    // 4. Check logcat output or test exception for the correct serialization String, and place it
+    //    in the map.
+    public static final String EXAMPLE_URL = "https://www.example.com/";
+    public static final String HTTP_URL = "http://www.example.com/";
+    public static final String URL_1 = "https://www.one.com/";
+    public static final String URL_1_NUMERAL = "https://www.1.com/";
+    public static final String URL_1_WITH_PATH = "https://www.one.com/some_path.html";
+    public static final String URL_2 = "https://www.two.com/";
+    public static final String URL_3 = "https://www.three.com/";
+    public static final String MAPS_URL = "https://maps.google.com/";
+    public static final String SEARCH_URL = "https://www.google.com/search?q=test";
+    public static final String SEARCH_2_URL = "https://www.google.com/search?q=query";
+    public static final String INITIAL_URL = "https://initial.com";
+    public static final String SPECULATED_URL = "https://speculated.com";
+    public static final String NTP_URL = "chrome://newtab/";
+    public static final String NTP_NATIVE_URL = "chrome-native://newtab/";
+    public static final String DOM_DISILLER_URL = "chrome-distiller://url";
+    public static final String RED_1 = "https://www.red.com/page1";
+    public static final String RED_2 = "https://www.red.com/page2";
+    public static final String RED_3 = "https://www.red.com/page3";
+    public static final String BLUE_1 = "https://www.blue.com/page1";
+    public static final String BLUE_2 = "https://www.blue.com/page2";
+    public static final String BLUE_3 = "https://www.blue.com/page3";
+    public static final String AMP_URL =
+            "https://www.google.com/amp/www.nyt.com/ampthml/blogs.html";
+    public static final String AMP_CACHE_URL =
+            "https://www.google.com/amp/s/www.nyt.com/ampthml/blogs.html";
+    public static final String TEXT_FRAGMENT_URL = "https://www.example.com/#:~:text=selector";
+    public static final String MULTI_TEXT_FRAGMENT_URL =
+            "https://www.example.com/#:~:text=selector1&text=selector2&text=selector3";
+    public static final String INVALID_URL = "http://0x100.0/";
+    public static final String GOOGLE_URL = "http://www.google.com/";
+    public static final String GOOGLE_URL_DOGS = "http://www.google.com/dogs";
+    public static final String GOOGLE_URL_DOGS_FUN = "http://www.google.com/dogs-are-fun";
+    public static final String GOOGLE_URL_DOG = "http://www.google.com/dog";
+    public static final String GOOGLE_URL_CAT = "http://www.google.com/cat";
+    public static final String GOOGLE_URL_PIG = "http://www.google.com/pig";
+    public static final String ABOUT_BLANK = "about:blank";
+    public static final String CHROME_ABOUT = "chrome://about";
+
+    // Map of URL string to GURL serialization.
+    /* package */ static final Map<String, String> sGURLMap;
+    static {
+        Map<String, String> map = new HashMap<>();
+        map.put(EXAMPLE_URL,
+                "82,1,true,0,5,0,-1,0,-1,8,15,0,-1,23,1,0,-1,0,-1,"
+                        + "false,false,https://www.example.com/");
+        map.put(HTTP_URL,
+                "81,1,true,0,4,0,-1,0,-1,7,15,0,-1,22,1,0,-1,0,-1,"
+                        + "false,false,http://www.example.com/");
+        map.put(URL_1,
+                "78,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,1,0,-1,0,-1,"
+                        + "false,false,https://www.one.com/");
+        map.put(URL_1_NUMERAL,
+                "75,1,true,0,5,0,-1,0,-1,8,9,0,-1,17,1,0,-1,0,-1,"
+                        + "false,false,https://www.1.com/");
+        map.put(URL_1_WITH_PATH,
+                "93,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,15,0,-1,0,-1,"
+                        + "false,false,https://www.one.com/some_path.html");
+        map.put(URL_2,
+                "78,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,1,0,-1,0,-1,"
+                        + "false,false,https://www.two.com/");
+        map.put(URL_3,
+                "80,1,true,0,5,0,-1,0,-1,8,13,0,-1,21,1,0,-1,0,-1,false,false,https://www.three.com/");
+        map.put(RED_1,
+                "83,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,6,0,-1,0,-1,"
+                        + "false,false,https://www.red.com/page1");
+        map.put(RED_2,
+                "83,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,6,0,-1,0,-1,"
+                        + "false,false,https://www.red.com/page2");
+        map.put(RED_3,
+                "83,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,6,0,-1,0,-1,"
+                        + "false,false,https://www.red.com/page3");
+        map.put(BLUE_1,
+                "84,1,true,0,5,0,-1,0,-1,8,12,0,-1,20,6,0,-1,0,-1,"
+                        + "false,false,https://www.blue.com/page1");
+        map.put(BLUE_2,
+                "84,1,true,0,5,0,-1,0,-1,8,12,0,-1,20,6,0,-1,0,-1,"
+                        + "false,false,https://www.blue.com/page2");
+        map.put(BLUE_3,
+                "84,1,true,0,5,0,-1,0,-1,8,12,0,-1,20,6,0,-1,0,-1,"
+                        + "false,false,https://www.blue.com/page3");
+        map.put(SEARCH_URL,
+                "94,1,true,0,5,0,-1,0,-1,8,14,0,-1,22,7,30,6,0,-1,"
+                        + "false,false,https://www.google.com/search?q=test");
+        map.put(SEARCH_2_URL,
+                "95,1,true,0,5,0,-1,0,-1,8,14,0,-1,22,7,30,7,0,-1,"
+                        + "false,false,https://www.google.com/search?q=query");
+        map.put(INITIAL_URL,
+                "78,1,true,0,5,0,-1,0,-1,8,11,0,-1,19,1,0,-1,0,-1,"
+                        + "false,false,https://initial.com/");
+        map.put(SPECULATED_URL,
+                "81,1,true,0,5,0,-1,0,-1,8,14,0,-1,22,1,0,-1,0,-1,"
+                        + "false,false,https://speculated.com/");
+        map.put(NTP_URL,
+                "73,1,true,0,6,0,-1,0,-1,9,6,0,-1,15,1,0,-1,0,-1,"
+                        + "false,false,chrome://newtab/");
+        map.put(NTP_NATIVE_URL,
+                "82,1,true,0,13,0,-1,0,-1,16,6,0,-1,22,1,0,-1,0,-1,false,false,"
+                        + "chrome-native://newtab/");
+        map.put(DOM_DISILLER_URL,
+                "82,1,true,0,16,0,-1,0,-1,19,3,0,-1,22,1,0,-1,0,-1,false,false,"
+                        + "chrome-distiller://url/");
+        map.put(MAPS_URL,
+                "82,1,true,0,5,0,-1,0,-1,8,15,0,-1,23,1,0,-1,0,-1,false,false,https://maps.google.com/");
+        map.put(AMP_URL,
+                "116,1,true,0,5,0,-1,0,-1,8,14,0,-1,22,35,0,-1,0,-1,false,false,https://www.google.com/amp/www.nyt.com/ampthml/blogs.html");
+        map.put(AMP_CACHE_URL,
+                "118,1,true,0,5,0,-1,0,-1,8,14,0,-1,22,37,0,-1,0,-1,false,false,https://www.google.com/amp/s/www.nyt.com/ampthml/blogs.html");
+        map.put(TEXT_FRAGMENT_URL,
+                "100,1,true,0,5,0,-1,0,-1,8,15,0,-1,23,1,0,-1,25,16,false,false,https://www.example.com/#:~:text=selector");
+        map.put(MULTI_TEXT_FRAGMENT_URL,
+                "131,1,true,0,5,0,-1,0,-1,8,15,0,-1,23,1,0,-1,25,47,false,false,https://www.example.com/#:~:text=selector1&text=selector2&text=selector3");
+        map.put(INVALID_URL,
+                "73,1,false,0,4,0,-1,0,-1,7,7,0,-1,14,1,0,-1,0,-1,false,false,http://0x100.0/");
+        map.put(GOOGLE_URL,
+                "80,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,1,0,-1,0,-1,false,false,http://www.google.com/");
+        map.put(GOOGLE_URL_DOGS,
+                "84,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,5,0,-1,0,-1,false,false,http://www.google.com/dogs");
+        map.put(GOOGLE_URL_DOGS_FUN,
+                "93,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,13,0,-1,0,-1,false,false,http://www.google.com/dogs-are-fun");
+        map.put(GOOGLE_URL_DOG,
+                "83,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,4,0,-1,0,-1,false,false,http://www.google.com/dog");
+        map.put(GOOGLE_URL_CAT,
+                "83,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,4,0,-1,0,-1,false,false,http://www.google.com/cat");
+        map.put(GOOGLE_URL_PIG,
+                "83,1,true,0,4,0,-1,0,-1,7,14,0,-1,21,4,0,-1,0,-1,false,false,http://www.google.com/pig");
+        map.put(ABOUT_BLANK,
+                "68,1,true,0,5,0,-1,0,-1,0,-1,0,-1,6,5,0,-1,0,-1,false,false,about:blank");
+        map.put(CHROME_ABOUT,
+                "72,1,true,0,6,0,-1,0,-1,9,5,0,-1,14,1,0,-1,0,-1,false,false,chrome://about/");
+        sGURLMap = Collections.unmodifiableMap(map);
+    }
+
+    /**
+     * @return the GURL resulting from parsing the provided url. Must be registered in |sGURLMap|.
+     */
+    public static GURL getGURL(String url) {
+        String serialized = sGURLMap.get(url);
+        if (serialized == null) {
+            throw new IllegalArgumentException("URL " + url + " not found");
+        }
+        serialized = serialized.replace(',', GURL.SERIALIZER_DELIMITER);
+        GURL gurl = GURL.deserialize(serialized);
+        // If you're here looking to use an empty GURL, just use GURL.emptyGURL() directly.
+        if (gurl.isEmpty()) {
+            throw new RuntimeException("Could not deserialize: " + serialized);
+        }
+        return gurl;
+    }
+}
diff --git a/android/test/java/src/org/chromium/url/ShadowGURL.java b/android/test/java/src/org/chromium/url/ShadowGURL.java
new file mode 100644
index 00000000000..53e1da192b1
--- /dev/null
+++ b/android/test/java/src/org/chromium/url/ShadowGURL.java
@@ -0,0 +1,62 @@
+// Copyright 2021 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package org.chromium.url;
+
+import org.robolectric.annotation.Implementation;
+import org.robolectric.annotation.Implements;
+
+import org.chromium.url.GURL.Natives;
+
+/**
+ * Shadow of {@link GURL}. Lets Robolectric tests use {@code GURL} without the native libraries
+ * loaded.
+ *
+ * <p>This shadow can create only GURLs listed in {@link JUnitTestGURLs}.
+ */
+@Implements(GURL.class)
+public class ShadowGURL {
+    /**
+     * The {@link GURL.Natives} implementation used by a shadowed {@link GURL}.
+     */
+    private static class NativesImpl implements GURL.Natives {
+        @Override
+        public void init(String url, GURL target) {
+            target.initForTesting(JUnitTestGURLs.getGURL(url));
+        }
+
+        @Override
+        public void getOrigin(String spec, boolean isValid, long nativeParsed, GURL target) {
+            throw new UnsupportedOperationException(
+                    "ShadowGURL.NativesImpl#getOrigin is not implemented");
+        }
+
+        @Override
+        public boolean domainIs(String spec, boolean isValid, long nativeParsed, String domain) {
+            throw new UnsupportedOperationException(
+                    "ShadowGURL.NativesImpl#domainIs is not implemented");
+        }
+
+        @Override
+        public long createNative(String spec, boolean isValid, long nativeParsed) {
+            throw new UnsupportedOperationException(
+                    "ShadowGURL.NativesImpl#createNative is not implemented");
+        }
+    }
+    private static final NativesImpl sNativesInstance = new NativesImpl();
+
+    /**
+     * We could instead shadow {@code GURLJni#get}, but that would require tests using this to load
+     * both shadows.
+     */
+    @Implementation
+    protected static Natives getNatives() {
+        return sNativesInstance;
+    }
+
+    @Implementation
+    protected static void ensureNativeInitializedForGURL() {
+        // Skip native initialization.
+    }
+}
diff --git a/features.gni b/features.gni
new file mode 100644
index 00000000000..482d8498a82
--- /dev/null
+++ b/features.gni
@@ -0,0 +1,16 @@
+# Copyright 2016 The Chromium Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Features used by targets inside and outside of |url|.
+# For details see declare_args() in build/config/BUILDCONFIG.gn.
+declare_args() {
+  # Enables the use of ICU alternatives in lieu of ICU for the target toolchain.
+  # The flag is used for Cronet to reduce the size of the Cronet binary.
+  use_platform_icu_alternatives = false
+}
+
+# Never use platform icu for host toolchain.
+# E.g. Don't apply this for host binaries when target_os = "android".
+use_platform_icu_alternatives =
+    use_platform_icu_alternatives && current_toolchain == default_toolchain
diff --git a/gurl.cc b/gurl.cc
new file mode 100644
index 00000000000..6930f73b6d5
--- /dev/null
+++ b/gurl.cc
@@ -0,0 +1,578 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/gurl.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <memory>
+#include <ostream>
+#include <utility>
+
+#include "base/check_op.h"
+#include "base/no_destructor.h"
+#include "base/strings/string_piece.h"
+#include "base/strings/string_util.h"
+#include "base/trace_event/base_tracing.h"
+#include "base/trace_event/memory_usage_estimator.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_util.h"
+
+GURL::GURL() : is_valid_(false) {
+}
+
+GURL::GURL(const GURL& other)
+    : spec_(other.spec_),
+      is_valid_(other.is_valid_),
+      parsed_(other.parsed_) {
+  if (other.inner_url_)
+    inner_url_ = std::make_unique<GURL>(*other.inner_url_);
+  // Valid filesystem urls should always have an inner_url_.
+  DCHECK(!is_valid_ || !SchemeIsFileSystem() || inner_url_);
+}
+
+GURL::GURL(GURL&& other) noexcept
+    : spec_(std::move(other.spec_)),
+      is_valid_(other.is_valid_),
+      parsed_(other.parsed_),
+      inner_url_(std::move(other.inner_url_)) {
+  other.is_valid_ = false;
+  other.parsed_ = url::Parsed();
+}
+
+GURL::GURL(base::StringPiece url_string) {
+  InitCanonical(url_string, true);
+}
+
+GURL::GURL(base::StringPiece16 url_string) {
+  InitCanonical(url_string, true);
+}
+
+GURL::GURL(const std::string& url_string, RetainWhiteSpaceSelector) {
+  InitCanonical(url_string, false);
+}
+
+GURL::GURL(const char* canonical_spec,
+           size_t canonical_spec_len,
+           const url::Parsed& parsed,
+           bool is_valid)
+    : spec_(canonical_spec, canonical_spec_len),
+      is_valid_(is_valid),
+      parsed_(parsed) {
+  InitializeFromCanonicalSpec();
+}
+
+GURL::GURL(std::string canonical_spec, const url::Parsed& parsed, bool is_valid)
+    : spec_(std::move(canonical_spec)), is_valid_(is_valid), parsed_(parsed) {
+  InitializeFromCanonicalSpec();
+}
+
+template <typename T, typename CharT>
+void GURL::InitCanonical(T input_spec, bool trim_path_end) {
+  url::StdStringCanonOutput output(&spec_);
+  is_valid_ = url::Canonicalize(
+      input_spec.data(), static_cast<int>(input_spec.length()), trim_path_end,
+      NULL, &output, &parsed_);
+
+  output.Complete();  // Must be done before using string.
+  if (is_valid_ && SchemeIsFileSystem()) {
+    inner_url_ = std::make_unique<GURL>(spec_.data(), parsed_.Length(),
+                                        *parsed_.inner_parsed(), true);
+  }
+  // Valid URLs always have non-empty specs.
+  DCHECK(!is_valid_ || !spec_.empty());
+}
+
+void GURL::InitializeFromCanonicalSpec() {
+  if (is_valid_ && SchemeIsFileSystem()) {
+    inner_url_ = std::make_unique<GURL>(spec_.data(), parsed_.Length(),
+                                        *parsed_.inner_parsed(), true);
+  }
+
+#ifndef NDEBUG
+  // For testing purposes, check that the parsed canonical URL is identical to
+  // what we would have produced. Skip checking for invalid URLs have no meaning
+  // and we can't always canonicalize then reproducibly.
+  if (is_valid_) {
+    DCHECK(!spec_.empty());
+    url::Component scheme;
+    // We can't do this check on the inner_url of a filesystem URL, as
+    // canonical_spec actually points to the start of the outer URL, so we'd
+    // end up with infinite recursion in this constructor.
+    if (!url::FindAndCompareScheme(spec_.data(), spec_.length(),
+                                   url::kFileSystemScheme, &scheme) ||
+        scheme.begin == parsed_.scheme.begin) {
+      // We need to retain trailing whitespace on path URLs, as the |parsed_|
+      // spec we originally received may legitimately contain trailing white-
+      // space on the path or  components e.g. if the #ref has been
+      // removed from a "foo:hello #ref" URL (see http://crbug.com/291747).
+      GURL test_url(spec_, RETAIN_TRAILING_PATH_WHITEPACE);
+
+      DCHECK_EQ(test_url.is_valid_, is_valid_);
+      DCHECK_EQ(test_url.spec_, spec_);
+
+      DCHECK_EQ(test_url.parsed_.scheme, parsed_.scheme);
+      DCHECK_EQ(test_url.parsed_.username, parsed_.username);
+      DCHECK_EQ(test_url.parsed_.password, parsed_.password);
+      DCHECK_EQ(test_url.parsed_.host, parsed_.host);
+      DCHECK_EQ(test_url.parsed_.port, parsed_.port);
+      DCHECK_EQ(test_url.parsed_.path, parsed_.path);
+      DCHECK_EQ(test_url.parsed_.query, parsed_.query);
+      DCHECK_EQ(test_url.parsed_.ref, parsed_.ref);
+    }
+  }
+#endif
+}
+
+GURL::~GURL() = default;
+
+GURL& GURL::operator=(const GURL& other) {
+  spec_ = other.spec_;
+  is_valid_ = other.is_valid_;
+  parsed_ = other.parsed_;
+
+  if (!other.inner_url_)
+    inner_url_.reset();
+  else if (inner_url_)
+    *inner_url_ = *other.inner_url_;
+  else
+    inner_url_ = std::make_unique<GURL>(*other.inner_url_);
+
+  return *this;
+}
+
+GURL& GURL::operator=(GURL&& other) noexcept {
+  spec_ = std::move(other.spec_);
+  is_valid_ = other.is_valid_;
+  parsed_ = other.parsed_;
+  inner_url_ = std::move(other.inner_url_);
+
+  other.is_valid_ = false;
+  other.parsed_ = url::Parsed();
+  return *this;
+}
+
+const std::string& GURL::spec() const {
+  if (is_valid_ || spec_.empty())
+    return spec_;
+
+  DCHECK(false) << "Trying to get the spec of an invalid URL!";
+  return base::EmptyString();
+}
+
+bool GURL::operator<(const GURL& other) const {
+  return spec_ < other.spec_;
+}
+
+bool GURL::operator>(const GURL& other) const {
+  return spec_ > other.spec_;
+}
+
+// Note: code duplicated below (it's inconvenient to use a template here).
+GURL GURL::Resolve(base::StringPiece relative) const {
+  // Not allowed for invalid URLs.
+  if (!is_valid_)
+    return GURL();
+
+  GURL result;
+  url::StdStringCanonOutput output(&result.spec_);
+  if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
+                            parsed_, relative.data(),
+                            static_cast<int>(relative.length()),
+                            nullptr, &output, &result.parsed_)) {
+    // Error resolving, return an empty URL.
+    return GURL();
+  }
+
+  output.Complete();
+  result.is_valid_ = true;
+  if (result.SchemeIsFileSystem()) {
+    result.inner_url_ =
+        std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(),
+                               *result.parsed_.inner_parsed(), true);
+  }
+  return result;
+}
+
+// Note: code duplicated above (it's inconvenient to use a template here).
+GURL GURL::Resolve(base::StringPiece16 relative) const {
+  // Not allowed for invalid URLs.
+  if (!is_valid_)
+    return GURL();
+
+  GURL result;
+  url::StdStringCanonOutput output(&result.spec_);
+  if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()),
+                            parsed_, relative.data(),
+                            static_cast<int>(relative.length()),
+                            nullptr, &output, &result.parsed_)) {
+    // Error resolving, return an empty URL.
+    return GURL();
+  }
+
+  output.Complete();
+  result.is_valid_ = true;
+  if (result.SchemeIsFileSystem()) {
+    result.inner_url_ =
+        std::make_unique<GURL>(result.spec_.data(), result.parsed_.Length(),
+                               *result.parsed_.inner_parsed(), true);
+  }
+  return result;
+}
+
+// Note: code duplicated below (it's inconvenient to use a template here).
+GURL GURL::ReplaceComponents(const Replacements& replacements) const {
+  GURL result;
+
+  // Not allowed for invalid URLs.
+  if (!is_valid_)
+    return GURL();
+
+  url::StdStringCanonOutput output(&result.spec_);
+  result.is_valid_ = url::ReplaceComponents(
+      spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
+      NULL, &output, &result.parsed_);
+
+  output.Complete();
+
+  result.ProcessFileSystemURLAfterReplaceComponents();
+  return result;
+}
+
+// Note: code duplicated above (it's inconvenient to use a template here).
+GURL GURL::ReplaceComponents(const ReplacementsW& replacements) const {
+  GURL result;
+
+  // Not allowed for invalid URLs.
+  if (!is_valid_)
+    return GURL();
+
+  url::StdStringCanonOutput output(&result.spec_);
+  result.is_valid_ = url::ReplaceComponents(
+      spec_.data(), static_cast<int>(spec_.length()), parsed_, replacements,
+      NULL, &output, &result.parsed_);
+
+  output.Complete();
+
+  result.ProcessFileSystemURLAfterReplaceComponents();
+
+  return result;
+}
+
+void GURL::ProcessFileSystemURLAfterReplaceComponents() {
+  if (!is_valid_)
+    return;
+  if (SchemeIsFileSystem()) {
+    inner_url_ = std::make_unique<GURL>(spec_.data(), parsed_.Length(),
+                                        *parsed_.inner_parsed(), true);
+  }
+}
+
+GURL GURL::DeprecatedGetOriginAsURL() const {
+  // This doesn't make sense for invalid or nonstandard URLs, so return
+  // the empty URL.
+  if (!is_valid_ || !IsStandard())
+    return GURL();
+
+  if (SchemeIsFileSystem())
+    return inner_url_->DeprecatedGetOriginAsURL();
+
+  Replacements replacements;
+  replacements.ClearUsername();
+  replacements.ClearPassword();
+  replacements.ClearPath();
+  replacements.ClearQuery();
+  replacements.ClearRef();
+
+  return ReplaceComponents(replacements);
+}
+
+GURL GURL::GetAsReferrer() const {
+  if (!is_valid() || !IsReferrerScheme(spec_.data(), parsed_.scheme))
+    return GURL();
+
+  if (!has_ref() && !has_username() && !has_password())
+    return GURL(*this);
+
+  Replacements replacements;
+  replacements.ClearRef();
+  replacements.ClearUsername();
+  replacements.ClearPassword();
+  return ReplaceComponents(replacements);
+}
+
+GURL GURL::GetWithEmptyPath() const {
+  // This doesn't make sense for invalid or nonstandard URLs, so return
+  // the empty URL.
+  if (!is_valid_ || !IsStandard())
+    return GURL();
+
+  // We could optimize this since we know that the URL is canonical, and we are
+  // appending a canonical path, so avoiding re-parsing.
+  GURL other(*this);
+  if (parsed_.path.len == 0)
+    return other;
+
+  // Clear everything after the path.
+  other.parsed_.query.reset();
+  other.parsed_.ref.reset();
+
+  // Set the path, since the path is longer than one, we can just set the
+  // first character and resize.
+  other.spec_[other.parsed_.path.begin] = '/';
+  other.parsed_.path.len = 1;
+  other.spec_.resize(other.parsed_.path.begin + 1);
+  return other;
+}
+
+GURL GURL::GetWithoutFilename() const {
+  return Resolve(".");
+}
+
+GURL GURL::GetWithoutRef() const {
+  if (!has_ref())
+    return GURL(*this);
+
+  Replacements replacements;
+  replacements.ClearRef();
+  return ReplaceComponents(replacements);
+}
+
+bool GURL::IsStandard() const {
+  return url::IsStandard(spec_.data(), parsed_.scheme);
+}
+
+bool GURL::IsAboutBlank() const {
+  return IsAboutUrl(url::kAboutBlankPath);
+}
+
+bool GURL::IsAboutSrcdoc() const {
+  return IsAboutUrl(url::kAboutSrcdocPath);
+}
+
+bool GURL::SchemeIs(base::StringPiece lower_ascii_scheme) const {
+  DCHECK(base::IsStringASCII(lower_ascii_scheme));
+  DCHECK(base::ToLowerASCII(lower_ascii_scheme) == lower_ascii_scheme);
+
+  if (!has_scheme())
+    return lower_ascii_scheme.empty();
+  return scheme_piece() == lower_ascii_scheme;
+}
+
+bool GURL::SchemeIsHTTPOrHTTPS() const {
+  return SchemeIs(url::kHttpsScheme) || SchemeIs(url::kHttpScheme);
+}
+
+bool GURL::SchemeIsWSOrWSS() const {
+  return SchemeIs(url::kWsScheme) || SchemeIs(url::kWssScheme);
+}
+
+bool GURL::SchemeIsCryptographic() const {
+  if (!has_scheme())
+    return false;
+  return SchemeIsCryptographic(scheme_piece());
+}
+
+bool GURL::SchemeIsCryptographic(base::StringPiece lower_ascii_scheme) {
+  DCHECK(base::IsStringASCII(lower_ascii_scheme));
+  DCHECK(base::ToLowerASCII(lower_ascii_scheme) == lower_ascii_scheme);
+
+  return lower_ascii_scheme == url::kHttpsScheme ||
+         lower_ascii_scheme == url::kWssScheme;
+}
+
+bool GURL::SchemeIsLocal() const {
+  // The `filesystem:` scheme is not in the Fetch spec, but Chromium still
+  // supports it in large part. It should be treated as a local scheme too.
+  return SchemeIs(url::kAboutScheme) || SchemeIs(url::kBlobScheme) ||
+         SchemeIs(url::kDataScheme) || SchemeIs(url::kFileSystemScheme);
+}
+
+int GURL::IntPort() const {
+  if (parsed_.port.is_nonempty())
+    return url::ParsePort(spec_.data(), parsed_.port);
+  return url::PORT_UNSPECIFIED;
+}
+
+int GURL::EffectiveIntPort() const {
+  int int_port = IntPort();
+  if (int_port == url::PORT_UNSPECIFIED && IsStandard())
+    return url::DefaultPortForScheme(spec_.data() + parsed_.scheme.begin,
+                                     parsed_.scheme.len);
+  return int_port;
+}
+
+std::string GURL::ExtractFileName() const {
+  url::Component file_component;
+  url::ExtractFileName(spec_.data(), parsed_.path, &file_component);
+  return ComponentString(file_component);
+}
+
+base::StringPiece GURL::PathForRequestPiece() const {
+  DCHECK(parsed_.path.is_nonempty())
+      << "Canonical path for requests should be non-empty";
+  if (parsed_.ref.is_valid()) {
+    // Clip off the reference when it exists. The reference starts after the
+    // #-sign, so we have to subtract one to also remove it.
+    return base::StringPiece(spec_).substr(
+        parsed_.path.begin, parsed_.ref.begin - parsed_.path.begin - 1);
+  }
+  // Compute the actual path length, rather than depending on the spec's
+  // terminator. If we're an inner_url, our spec continues on into our outer
+  // URL's path/query/ref.
+  int path_len = parsed_.path.len;
+  if (parsed_.query.is_valid())
+    path_len = parsed_.query.end() - parsed_.path.begin;
+
+  return base::StringPiece(spec_).substr(parsed_.path.begin, path_len);
+}
+
+std::string GURL::PathForRequest() const {
+  return std::string(PathForRequestPiece());
+}
+
+std::string GURL::HostNoBrackets() const {
+  return std::string(HostNoBracketsPiece());
+}
+
+base::StringPiece GURL::HostNoBracketsPiece() const {
+  // If host looks like an IPv6 literal, strip the square brackets.
+  url::Component h(parsed_.host);
+  if (h.len >= 2 && spec_[h.begin] == '[' && spec_[h.end() - 1] == ']') {
+    h.begin++;
+    h.len -= 2;
+  }
+  return ComponentStringPiece(h);
+}
+
+std::string GURL::GetContent() const {
+  return std::string(GetContentPiece());
+}
+
+base::StringPiece GURL::GetContentPiece() const {
+  if (!is_valid_)
+    return base::StringPiece();
+  url::Component content_component = parsed_.GetContent();
+  if (!SchemeIs(url::kJavaScriptScheme) && parsed_.ref.is_valid())
+    content_component.len -= parsed_.ref.len + 1;
+  return ComponentStringPiece(content_component);
+}
+
+bool GURL::HostIsIPAddress() const {
+  return is_valid_ && url::HostIsIPAddress(host_piece());
+}
+
+const GURL& GURL::EmptyGURL() {
+  static base::NoDestructor<GURL> empty_gurl;
+  return *empty_gurl;
+}
+
+bool GURL::DomainIs(base::StringPiece canonical_domain) const {
+  if (!is_valid_)
+    return false;
+
+  // FileSystem URLs have empty host_piece, so check this first.
+  if (inner_url_ && SchemeIsFileSystem())
+    return inner_url_->DomainIs(canonical_domain);
+  return url::DomainIs(host_piece(), canonical_domain);
+}
+
+bool GURL::EqualsIgnoringRef(const GURL& other) const {
+  int ref_position = parsed_.CountCharactersBefore(url::Parsed::REF, true);
+  int ref_position_other =
+      other.parsed_.CountCharactersBefore(url::Parsed::REF, true);
+  return base::StringPiece(spec_).substr(0, ref_position) ==
+         base::StringPiece(other.spec_).substr(0, ref_position_other);
+}
+
+void GURL::Swap(GURL* other) {
+  spec_.swap(other->spec_);
+  std::swap(is_valid_, other->is_valid_);
+  std::swap(parsed_, other->parsed_);
+  inner_url_.swap(other->inner_url_);
+}
+
+size_t GURL::EstimateMemoryUsage() const {
+  return base::trace_event::EstimateMemoryUsage(spec_) +
+         base::trace_event::EstimateMemoryUsage(inner_url_) +
+         (parsed_.inner_parsed() ? sizeof(url::Parsed) : 0);
+}
+
+bool GURL::IsAboutUrl(base::StringPiece allowed_path) const {
+  if (!SchemeIs(url::kAboutScheme))
+    return false;
+
+  if (has_host() || has_username() || has_password() || has_port())
+    return false;
+
+  return IsAboutPath(path_piece(), allowed_path);
+}
+
+// static
+bool GURL::IsAboutPath(base::StringPiece actual_path,
+                       base::StringPiece allowed_path) {
+  if (!base::StartsWith(actual_path, allowed_path))
+    return false;
+
+  if (actual_path.size() == allowed_path.size()) {
+    DCHECK_EQ(actual_path, allowed_path);
+    return true;
+  }
+
+  if ((actual_path.size() == allowed_path.size() + 1) &&
+      actual_path.back() == '/') {
+    DCHECK_EQ(actual_path, std::string(allowed_path) + '/');
+    return true;
+  }
+
+  return false;
+}
+
+void GURL::WriteIntoTrace(perfetto::TracedValue context) const {
+  std::move(context).WriteString(possibly_invalid_spec());
+}
+
+std::ostream& operator<<(std::ostream& out, const GURL& url) {
+  return out << url.possibly_invalid_spec();
+}
+
+bool operator==(const GURL& x, const GURL& y) {
+  return x.possibly_invalid_spec() == y.possibly_invalid_spec();
+}
+
+bool operator!=(const GURL& x, const GURL& y) {
+  return !(x == y);
+}
+
+bool operator==(const GURL& x, const base::StringPiece& spec) {
+  DCHECK_EQ(GURL(spec).possibly_invalid_spec(), spec)
+      << "Comparisons of GURLs and strings must ensure as a precondition that "
+         "the string is fully canonicalized.";
+  return x.possibly_invalid_spec() == spec;
+}
+
+bool operator==(const base::StringPiece& spec, const GURL& x) {
+  return x == spec;
+}
+
+bool operator!=(const GURL& x, const base::StringPiece& spec) {
+  return !(x == spec);
+}
+
+bool operator!=(const base::StringPiece& spec, const GURL& x) {
+  return !(x == spec);
+}
+
+namespace url::debug {
+
+ScopedUrlCrashKey::ScopedUrlCrashKey(base::debug::CrashKeyString* crash_key,
+                                     const GURL& url)
+    : scoped_string_value_(
+          crash_key,
+          url.is_empty() ? "<empty url>" : url.possibly_invalid_spec()) {}
+
+ScopedUrlCrashKey::~ScopedUrlCrashKey() = default;
+
+}  // namespace url::debug
diff --git a/gurl.h b/gurl.h
new file mode 100644
index 00000000000..688a1018a9b
--- /dev/null
+++ b/gurl.h
@@ -0,0 +1,534 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_GURL_H_
+#define URL_GURL_H_
+
+#include <stddef.h>
+
+#include <iosfwd>
+#include <memory>
+#include <string>
+
+#include "base/component_export.h"
+#include "base/debug/alias.h"
+#include "base/debug/crash_logging.h"
+#include "base/strings/string_piece.h"
+#include "base/trace_event/base_tracing_forward.h"
+#include "url/third_party/mozilla/url_parse.h"
+#include "url/url_canon.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_constants.h"
+
+// Represents a URL. GURL is Google's URL parsing library.
+//
+// A parsed canonicalized URL is guaranteed to be UTF-8. Any non-ASCII input
+// characters are UTF-8 encoded and % escaped to ASCII.
+//
+// The string representation of a URL is called the spec(). Getting the
+// spec will assert if the URL is invalid to help protect against malicious
+// URLs. If you want the "best effort" canonicalization of an invalid URL, you
+// can use possibly_invalid_spec(). Test validity with is_valid(). Data and
+// javascript URLs use GetContent() to extract the data.
+//
+// This class has existence checkers and getters for the various components of
+// a URL. Existence is different than being nonempty. "http://www.google.com/?"
+// has a query that just happens to be empty, and has_query() will return true
+// while the query getters will return the empty string.
+//
+// Prefer not to modify a URL using string operations (though sometimes this is
+// unavoidable). Instead, use ReplaceComponents which can replace or delete
+// multiple parts of a URL in one step, doesn't re-canonicalize unchanged
+// sections, and avoids some screw-ups. An example is creating a URL with a
+// path that contains a literal '#'. Using string concatenation will generate a
+// URL with a truncated path and a reference fragment, while ReplaceComponents
+// will know to escape this and produce the desired result.
+class COMPONENT_EXPORT(URL) GURL {
+ public:
+  typedef url::StringPieceReplacements<char> Replacements;
+  typedef url::StringPieceReplacements<char16_t> ReplacementsW;
+
+  // Creates an empty, invalid URL.
+  GURL();
+
+  // Copy construction is relatively inexpensive, with most of the time going
+  // to reallocating the string. It does not re-parse.
+  GURL(const GURL& other);
+  GURL(GURL&& other) noexcept;
+
+  // The strings to this contructor should be UTF-8 / UTF-16.
+  explicit GURL(base::StringPiece url_string);
+  explicit GURL(base::StringPiece16 url_string);
+
+  // Constructor for URLs that have already been parsed and canonicalized. This
+  // is used for conversions from KURL, for example. The caller must supply all
+  // information associated with the URL, which must be correct and consistent.
+  GURL(const char* canonical_spec,
+       size_t canonical_spec_len,
+       const url::Parsed& parsed,
+       bool is_valid);
+  // Notice that we take the canonical_spec by value so that we can convert
+  // from WebURL without copying the string. When we call this constructor
+  // we pass in a temporary std::string, which lets the compiler skip the
+  // copy and just move the std::string into the function argument. In the
+  // implementation, we use std::move to move the data into the GURL itself,
+  // which means we end up with zero copies.
+  GURL(std::string canonical_spec, const url::Parsed& parsed, bool is_valid);
+
+  ~GURL();
+
+  GURL& operator=(const GURL& other);
+  GURL& operator=(GURL&& other) noexcept;
+
+  // Returns true when this object represents a valid parsed URL. When not
+  // valid, other functions will still succeed, but you will not get canonical
+  // data out in the format you may be expecting. Instead, we keep something
+  // "reasonable looking" so that the user can see how it's busted if
+  // displayed to them.
+  bool is_valid() const {
+    return is_valid_;
+  }
+
+  // Returns true if the URL is zero-length. Note that empty URLs are also
+  // invalid, and is_valid() will return false for them. This is provided
+  // because some users may want to treat the empty case differently.
+  bool is_empty() const {
+    return spec_.empty();
+  }
+
+  // Returns the raw spec, i.e., the full text of the URL, in canonical UTF-8,
+  // if the URL is valid. If the URL is not valid, this will assert and return
+  // the empty string (for safety in release builds, to keep them from being
+  // misused which might be a security problem).
+  //
+  // The URL will be ASCII (non-ASCII characters will be %-escaped UTF-8).
+  //
+  // The exception is for empty() URLs (which are !is_valid()) but this will
+  // return the empty string without asserting.
+  //
+  // Use invalid_spec() below to get the unusable spec of an invalid URL. This
+  // separation is designed to prevent errors that may cause security problems
+  // that could result from the mistaken use of an invalid URL.
+  const std::string& spec() const;
+
+  // Returns the potentially invalid spec for a the URL. This spec MUST NOT be
+  // modified or sent over the network. It is designed to be displayed in error
+  // messages to the user, as the appearance of the spec may explain the error.
+  // If the spec is valid, the valid spec will be returned.
+  //
+  // The returned string is guaranteed to be valid UTF-8.
+  const std::string& possibly_invalid_spec() const {
+    return spec_;
+  }
+
+  // Getter for the raw parsed structure. This allows callers to locate parts
+  // of the URL within the spec themselves. Most callers should consider using
+  // the individual component getters below.
+  //
+  // The returned parsed structure will reference into the raw spec, which may
+  // or may not be valid. If you are using this to index into the spec, BE
+  // SURE YOU ARE USING possibly_invalid_spec() to get the spec, and that you
+  // don't do anything "important" with invalid specs.
+  const url::Parsed& parsed_for_possibly_invalid_spec() const {
+    return parsed_;
+  }
+
+  // Allows GURL to used as a key in STL (for example, a std::set or std::map).
+  bool operator<(const GURL& other) const;
+  bool operator>(const GURL& other) const;
+
+  // Resolves a URL that's possibly relative to this object's URL, and returns
+  // it. Absolute URLs are also handled according to the rules of URLs on web
+  // pages.
+  //
+  // It may be impossible to resolve the URLs properly. If the input is not
+  // "standard" (IsStandard() == false) and the input looks relative, we can't
+  // resolve it. In these cases, the result will be an empty, invalid GURL.
+  //
+  // The result may also be a nonempty, invalid URL if the input has some kind
+  // of encoding error. In these cases, we will try to construct a "good" URL
+  // that may have meaning to the user, but it will be marked invalid.
+  //
+  // It is an error to resolve a URL relative to an invalid URL. The result
+  // will be the empty URL.
+  GURL Resolve(base::StringPiece relative) const;
+  GURL Resolve(base::StringPiece16 relative) const;
+
+  // Creates a new GURL by replacing the current URL's components with the
+  // supplied versions. See the Replacements class in url_canon.h for more.
+  //
+  // These are not particularly quick, so avoid doing mutations when possible.
+  // Prefer the 8-bit version when possible.
+  //
+  // It is an error to replace components of an invalid URL. The result will
+  // be the empty URL.
+  //
+  // Note that this intentionally disallows direct use of url::Replacements,
+  // which is harder to use correctly.
+  GURL ReplaceComponents(const Replacements& replacements) const;
+  GURL ReplaceComponents(const ReplacementsW& replacements) const;
+
+  // A helper function that is equivalent to replacing the path with a slash
+  // and clearing out everything after that. We sometimes need to know just the
+  // scheme and the authority. If this URL is not a standard URL (it doesn't
+  // have the regular authority and path sections), then the result will be
+  // an empty, invalid GURL. Note that this *does* work for file: URLs, which
+  // some callers may want to filter out before calling this.
+  //
+  // It is an error to get an empty path on an invalid URL. The result
+  // will be the empty URL.
+  GURL GetWithEmptyPath() const;
+
+  // A helper function to return a GURL without the filename, query values, and
+  // fragment. For example,
+  // GURL("https://www.foo.com/index.html?q=test").GetWithoutFilename().spec()
+  // will return "https://www.foo.com/".
+  // GURL("https://www.foo.com/bar/").GetWithoutFilename().spec()
+  // will return "https://www.foo.com/bar/". If the GURL is invalid or missing a
+  // scheme, authority or path, it will return an empty, invalid GURL.
+  GURL GetWithoutFilename() const;
+
+  // A helper function to return a GURL without the Ref (also named Fragment
+  // Identifier). For example,
+  // GURL("https://www.foo.com/index.html#test").GetWithoutRef().spec()
+  // will return "https://www.foo.com/index.html".
+  // If the GURL is invalid or missing a
+  // scheme, authority or path, it will return an empty, invalid GURL.
+  GURL GetWithoutRef() const;
+
+  // A helper function to return a GURL containing just the scheme, host,
+  // and port from a URL. Equivalent to clearing any username and password,
+  // replacing the path with a slash, and clearing everything after that. If
+  // this URL is not a standard URL, then the result will be an empty,
+  // invalid GURL. If the URL has neither username nor password, this
+  // degenerates to GetWithEmptyPath().
+  //
+  // It is an error to get the origin of an invalid URL. The result
+  // will be the empty URL.
+  //
+  // WARNING: Please avoid converting urls into origins if at all possible!
+  // //docs/security/origin-vs-url.md is a list of gotchas that can result. Such
+  // conversions will likely return a wrong result for about:blank and/or
+  // in the presence of iframe.sandbox attribute. Prefer to get origins directly
+  // from the source (e.g. RenderFrameHost::GetLastCommittedOrigin).
+  GURL DeprecatedGetOriginAsURL() const;
+
+  // A helper function to return a GURL stripped from the elements that are not
+  // supposed to be sent as HTTP referrer: username, password and ref fragment.
+  // For invalid URLs or URLs that no valid referrers, an empty URL will be
+  // returned.
+  GURL GetAsReferrer() const;
+
+  // Returns true if the scheme for the current URL is a known "standard-format"
+  // scheme. A standard-format scheme adheres to what RFC 3986 calls "generic
+  // URI syntax" (https://tools.ietf.org/html/rfc3986#section-3). This includes
+  // file: and filesystem:, which some callers may want to filter out explicitly
+  // by calling SchemeIsFile[System].
+  bool IsStandard() const;
+
+  // Returns true when the url is of the form about:blank, about:blank?foo or
+  // about:blank/#foo.
+  bool IsAboutBlank() const;
+
+  // Returns true when the url is of the form about:srcdoc, about:srcdoc?foo or
+  // about:srcdoc/#foo.
+  bool IsAboutSrcdoc() const;
+
+  // Returns true if the given parameter (should be lower-case ASCII to match
+  // the canonicalized scheme) is the scheme for this URL. Do not include a
+  // colon.
+  bool SchemeIs(base::StringPiece lower_ascii_scheme) const;
+
+  // Returns true if the scheme is "http" or "https".
+  bool SchemeIsHTTPOrHTTPS() const;
+
+  // Returns true is the scheme is "ws" or "wss".
+  bool SchemeIsWSOrWSS() const;
+
+  // We often need to know if this is a file URL. File URLs are "standard", but
+  // are often treated separately by some programs.
+  bool SchemeIsFile() const {
+    return SchemeIs(url::kFileScheme);
+  }
+
+  // FileSystem URLs need to be treated differently in some cases.
+  bool SchemeIsFileSystem() const {
+    return SchemeIs(url::kFileSystemScheme);
+  }
+
+  // Returns true if the scheme indicates a network connection that uses TLS or
+  // some other cryptographic protocol (e.g. QUIC) for security.
+  //
+  // This function is a not a complete test of whether or not an origin's code
+  // is minimally trustworthy. For that, see Chromium's |IsOriginSecure| for a
+  // higher-level and more complete semantics. See that function's documentation
+  // for more detail.
+  bool SchemeIsCryptographic() const;
+
+  // As above, but static. Parameter should be lower-case ASCII.
+  static bool SchemeIsCryptographic(base::StringPiece lower_ascii_scheme);
+
+  // Returns true if the scheme is "blob".
+  bool SchemeIsBlob() const {
+    return SchemeIs(url::kBlobScheme);
+  }
+
+  // Returns true if the scheme is a local scheme, as defined in Fetch:
+  // https://fetch.spec.whatwg.org/#local-scheme
+  bool SchemeIsLocal() const;
+
+  // For most URLs, the "content" is everything after the scheme (skipping the
+  // scheme delimiting colon) and before the fragment (skipping the fragment
+  // delimiting octothorpe). For javascript URLs the "content" also includes the
+  // fragment delimiter and fragment.
+  //
+  // It is an error to get the content of an invalid URL: the result will be an
+  // empty string.
+  std::string GetContent() const;
+  base::StringPiece GetContentPiece() const;
+
+  // Returns true if the hostname is an IP address. Note: this function isn't
+  // as cheap as a simple getter because it re-parses the hostname to verify.
+  bool HostIsIPAddress() const;
+
+  // Not including the colon. If you are comparing schemes, prefer SchemeIs.
+  bool has_scheme() const { return parsed_.scheme.is_valid(); }
+  std::string scheme() const {
+    return ComponentString(parsed_.scheme);
+  }
+  base::StringPiece scheme_piece() const {
+    return ComponentStringPiece(parsed_.scheme);
+  }
+
+  bool has_username() const { return parsed_.username.is_valid(); }
+  std::string username() const {
+    return ComponentString(parsed_.username);
+  }
+  base::StringPiece username_piece() const {
+    return ComponentStringPiece(parsed_.username);
+  }
+
+  bool has_password() const { return parsed_.password.is_valid(); }
+  std::string password() const {
+    return ComponentString(parsed_.password);
+  }
+  base::StringPiece password_piece() const {
+    return ComponentStringPiece(parsed_.password);
+  }
+
+  // The host may be a hostname, an IPv4 address, or an IPv6 literal surrounded
+  // by square brackets, like "[2001:db8::1]". To exclude these brackets, use
+  // HostNoBrackets() below.
+  bool has_host() const {
+    // Note that hosts are special, absence of host means length 0.
+    return parsed_.host.is_nonempty();
+  }
+  std::string host() const {
+    return ComponentString(parsed_.host);
+  }
+  base::StringPiece host_piece() const {
+    return ComponentStringPiece(parsed_.host);
+  }
+
+  // The port if one is explicitly specified. Most callers will want IntPort()
+  // or EffectiveIntPort() instead of these. The getters will not include the
+  // ':'.
+  bool has_port() const { return parsed_.port.is_valid(); }
+  std::string port() const {
+    return ComponentString(parsed_.port);
+  }
+  base::StringPiece port_piece() const {
+    return ComponentStringPiece(parsed_.port);
+  }
+
+  // Including first slash following host, up to the query. The URL
+  // "http://www.google.com/" has a path of "/".
+  bool has_path() const { return parsed_.path.is_valid(); }
+  std::string path() const {
+    return ComponentString(parsed_.path);
+  }
+  base::StringPiece path_piece() const {
+    return ComponentStringPiece(parsed_.path);
+  }
+
+  // Stuff following '?' up to the ref. The getters will not include the '?'.
+  bool has_query() const { return parsed_.query.is_valid(); }
+  std::string query() const {
+    return ComponentString(parsed_.query);
+  }
+  base::StringPiece query_piece() const {
+    return ComponentStringPiece(parsed_.query);
+  }
+
+  // Stuff following '#' to the end of the string. This will be %-escaped UTF-8.
+  // The getters will not include the '#'.
+  bool has_ref() const { return parsed_.ref.is_valid(); }
+  std::string ref() const {
+    return ComponentString(parsed_.ref);
+  }
+  base::StringPiece ref_piece() const {
+    return ComponentStringPiece(parsed_.ref);
+  }
+
+  // Returns a parsed version of the port. Can also be any of the special
+  // values defined in Parsed for ExtractPort.
+  int IntPort() const;
+
+  // Returns the port number of the URL, or the default port number.
+  // If the scheme has no concept of port (or unknown default) returns
+  // PORT_UNSPECIFIED.
+  int EffectiveIntPort() const;
+
+  // Extracts the filename portion of the path and returns it. The filename
+  // is everything after the last slash in the path. This may be empty.
+  std::string ExtractFileName() const;
+
+  // Returns the path that should be sent to the server. This is the path,
+  // parameter, and query portions of the URL. It is guaranteed to be ASCII.
+  std::string PathForRequest() const;
+
+  // Returns the same characters as PathForRequest(), avoiding a copy.
+  base::StringPiece PathForRequestPiece() const;
+
+  // Returns the host, excluding the square brackets surrounding IPv6 address
+  // literals. This can be useful for passing to getaddrinfo().
+  std::string HostNoBrackets() const;
+
+  // Returns the same characters as HostNoBrackets(), avoiding a copy.
+  base::StringPiece HostNoBracketsPiece() const;
+
+  // Returns true if this URL's host matches or is in the same domain as
+  // the given input string. For example, if the hostname of the URL is
+  // "www.google.com", this will return true for "com", "google.com", and
+  // "www.google.com".
+  //
+  // The input domain should match host canonicalization rules. i.e. the input
+  // should be lowercase except for escape chars.
+  //
+  // This call is more efficient than getting the host and checking whether the
+  // host has the specific domain or not because no copies or object
+  // constructions are done.
+  bool DomainIs(base::StringPiece canonical_domain) const;
+
+  // Checks whether or not two URLs differ only in the ref (the part after
+  // the # character).
+  bool EqualsIgnoringRef(const GURL& other) const;
+
+  // Swaps the contents of this GURL object with |other|, without doing
+  // any memory allocations.
+  void Swap(GURL* other);
+
+  // Returns a reference to a singleton empty GURL. This object is for callers
+  // who return references but don't have anything to return in some cases.
+  // If you just want an empty URL for normal use, prefer GURL(). This function
+  // may be called from any thread.
+  static const GURL& EmptyGURL();
+
+  // Returns the inner URL of a nested URL (currently only non-null for
+  // filesystem URLs).
+  //
+  // TODO(mmenke): inner_url().spec() currently returns the same value as
+  // caling spec() on the GURL itself. This should be fixed.
+  // See https://crbug.com/619596
+  const GURL* inner_url() const {
+    return inner_url_.get();
+  }
+
+  // Estimates dynamic memory usage.
+  // See base/trace_event/memory_usage_estimator.h for more info.
+  size_t EstimateMemoryUsage() const;
+
+  // Helper used by GURL::IsAboutUrl and KURL::IsAboutURL.
+  static bool IsAboutPath(base::StringPiece actual_path,
+                          base::StringPiece allowed_path);
+
+  void WriteIntoTrace(perfetto::TracedValue context) const;
+
+ private:
+  // Variant of the string parsing constructor that allows the caller to elect
+  // retain trailing whitespace, if any, on the passed URL spec, but only if
+  // the scheme is one that allows trailing whitespace. The primary use-case is
+  // for data: URLs. In most cases, you want to use the single parameter
+  // constructor above.
+  enum RetainWhiteSpaceSelector { RETAIN_TRAILING_PATH_WHITEPACE };
+  GURL(const std::string& url_string, RetainWhiteSpaceSelector);
+
+  template <typename T, typename CharT = typename T::value_type>
+  void InitCanonical(T input_spec, bool trim_path_end);
+
+  void InitializeFromCanonicalSpec();
+
+  // Helper used by IsAboutBlank and IsAboutSrcdoc.
+  bool IsAboutUrl(base::StringPiece allowed_path) const;
+
+  // Returns the substring of the input identified by the given component.
+  std::string ComponentString(const url::Component& comp) const {
+    return std::string(ComponentStringPiece(comp));
+  }
+  base::StringPiece ComponentStringPiece(const url::Component& comp) const {
+    if (comp.is_empty())
+      return base::StringPiece();
+    return base::StringPiece(spec_).substr(static_cast<size_t>(comp.begin),
+                                           static_cast<size_t>(comp.len));
+  }
+
+  void ProcessFileSystemURLAfterReplaceComponents();
+
+  // The actual text of the URL, in canonical ASCII form.
+  std::string spec_;
+
+  // Set when the given URL is valid. Otherwise, we may still have a spec and
+  // components, but they may not identify valid resources (for example, an
+  // invalid port number, invalid characters in the scheme, etc.).
+  bool is_valid_;
+
+  // Identified components of the canonical spec.
+  url::Parsed parsed_;
+
+  // Used for nested schemes [currently only filesystem:].
+  std::unique_ptr<GURL> inner_url_;
+};
+
+// Stream operator so GURL can be used in assertion statements.
+COMPONENT_EXPORT(URL)
+std::ostream& operator<<(std::ostream& out, const GURL& url);
+
+COMPONENT_EXPORT(URL) bool operator==(const GURL& x, const GURL& y);
+COMPONENT_EXPORT(URL) bool operator!=(const GURL& x, const GURL& y);
+
+// Equality operator for comparing raw spec_. This should be used in place of
+// url == GURL(spec) where |spec| is known (i.e. constants). This is to prevent
+// needlessly re-parsing |spec| into a temporary GURL.
+COMPONENT_EXPORT(URL)
+bool operator==(const GURL& x, const base::StringPiece& spec);
+COMPONENT_EXPORT(URL)
+bool operator==(const base::StringPiece& spec, const GURL& x);
+COMPONENT_EXPORT(URL)
+bool operator!=(const GURL& x, const base::StringPiece& spec);
+COMPONENT_EXPORT(URL)
+bool operator!=(const base::StringPiece& spec, const GURL& x);
+
+// DEBUG_ALIAS_FOR_GURL(var_name, url) copies |url| into a new stack-allocated
+// variable named |<var_name>|.  This helps ensure that the value of |url| gets
+// preserved in crash dumps.
+#define DEBUG_ALIAS_FOR_GURL(var_name, url) \
+  DEBUG_ALIAS_FOR_CSTR(var_name, (url).possibly_invalid_spec().c_str(), 128)
+
+namespace url::debug {
+
+class COMPONENT_EXPORT(URL) ScopedUrlCrashKey {
+ public:
+  ScopedUrlCrashKey(base::debug::CrashKeyString* crash_key, const GURL& value);
+  ~ScopedUrlCrashKey();
+
+  ScopedUrlCrashKey(const ScopedUrlCrashKey&) = delete;
+  ScopedUrlCrashKey& operator=(const ScopedUrlCrashKey&) = delete;
+
+ private:
+  base::debug::ScopedCrashKeyString scoped_string_value_;
+};
+
+}  // namespace url::debug
+
+#endif  // URL_GURL_H_
diff --git a/gurl_abstract_tests.h b/gurl_abstract_tests.h
new file mode 100644
index 00000000000..3cde8420567
--- /dev/null
+++ b/gurl_abstract_tests.h
@@ -0,0 +1,119 @@
+// Copyright 2021 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_GURL_ABSTRACT_TESTS_H_
+#define URL_GURL_ABSTRACT_TESTS_H_
+
+// Test suite for tests that cover both url::Url and blink::SecurityUrl.
+//
+// AbstractUrlTest below abstracts away differences between GURL and blink::KURL
+// by parametrizing the tests with a class that has to expose the following
+// members:
+//   using UrlType = ...;
+//   static UrlType CreateUrlFromString(base::StringPiece s);
+//   static bool IsAboutBlank(const UrlType& url);
+//   static bool IsAboutSrcdoc(const UrlType& url);
+template <typename TUrlTraits>
+class AbstractUrlTest : public testing::Test {
+ protected:
+  // Wrappers that help ellide away TUrlTraits.
+  //
+  // Note that calling the wrappers needs to be prefixed with `this->...` to
+  // avoid hitting: explicit qualification required to use member 'IsAboutBlank'
+  // from dependent base class.
+  using UrlType = typename TUrlTraits::UrlType;
+  UrlType CreateUrlFromString(base::StringPiece s) {
+    return TUrlTraits::CreateUrlFromString(s);
+  }
+  bool IsAboutBlank(const UrlType& url) {
+    return TUrlTraits::IsAboutBlank(url);
+  }
+  bool IsAboutSrcdoc(const UrlType& url) {
+    return TUrlTraits::IsAboutSrcdoc(url);
+  }
+};
+
+TYPED_TEST_SUITE_P(AbstractUrlTest);
+
+TYPED_TEST_P(AbstractUrlTest, IsAboutBlankTest) {
+  // See https://tools.ietf.org/html/rfc6694 which explicitly allows
+  // `about-query` and `about-fragment` parts in about: URLs.
+  const std::string kAboutBlankUrls[] = {"about:blank", "about:blank?foo",
+                                         "about:blank/#foo",
+                                         "about:blank?foo#foo"};
+  for (const auto& input : kAboutBlankUrls) {
+    SCOPED_TRACE(testing::Message() << "Test input: " << input);
+    auto url = this->CreateUrlFromString(input);
+    EXPECT_TRUE(this->IsAboutBlank(url));
+  }
+
+  const std::string kNotAboutBlankUrls[] = {"",
+                                            "about",
+                                            "about:",
+                                            "about:blanky",
+                                            "about:blan",
+                                            "about:about:blank:",
+                                            "data:blank",
+                                            "http:blank",
+                                            "about://blank",
+                                            "about:blank/foo",
+                                            "about://:8000/blank",
+                                            "about://foo:foo@/blank",
+                                            "foo@about:blank",
+                                            "foo:bar@about:blank",
+                                            "about:blank:8000",
+                                            "about:blANk"};
+  for (const auto& input : kNotAboutBlankUrls) {
+    SCOPED_TRACE(testing::Message() << "Test input: " << input);
+    auto url = this->CreateUrlFromString(input);
+    EXPECT_FALSE(this->IsAboutBlank(url));
+  }
+}
+
+TYPED_TEST_P(AbstractUrlTest, IsAboutSrcdocTest) {
+  // See https://tools.ietf.org/html/rfc6694 which explicitly allows
+  // `about-query` and `about-fragment` parts in about: URLs.
+  //
+  // `about:srcdoc` is defined in
+  // https://html.spec.whatwg.org/multipage/urls-and-fetching.html#about:srcdoc
+  // which refers to rfc6694 for details.
+  const std::string kAboutSrcdocUrls[] = {
+      "about:srcdoc", "about:srcdoc/", "about:srcdoc?foo", "about:srcdoc/#foo",
+      "about:srcdoc?foo#foo"};
+  for (const auto& input : kAboutSrcdocUrls) {
+    SCOPED_TRACE(testing::Message() << "Test input: " << input);
+    auto url = this->CreateUrlFromString(input);
+    EXPECT_TRUE(this->IsAboutSrcdoc(url));
+  }
+
+  const std::string kNotAboutSrcdocUrls[] = {"",
+                                             "about",
+                                             "about:",
+                                             "about:srcdocx",
+                                             "about:srcdo",
+                                             "about:about:srcdoc:",
+                                             "data:srcdoc",
+                                             "http:srcdoc",
+                                             "about:srcdo",
+                                             "about://srcdoc",
+                                             "about://srcdoc\\",
+                                             "about:srcdoc/foo",
+                                             "about://:8000/srcdoc",
+                                             "about://foo:foo@/srcdoc",
+                                             "foo@about:srcdoc",
+                                             "foo:bar@about:srcdoc",
+                                             "about:srcdoc:8000",
+                                             "about:srCDOc"};
+  for (const auto& input : kNotAboutSrcdocUrls) {
+    SCOPED_TRACE(testing::Message() << "Test input: " << input);
+    auto url = this->CreateUrlFromString(input);
+    EXPECT_FALSE(this->IsAboutSrcdoc(url));
+  }
+}
+
+REGISTER_TYPED_TEST_SUITE_P(AbstractUrlTest,
+                            IsAboutBlankTest,
+                            IsAboutSrcdocTest);
+
+#endif  // URL_GURL_ABSTRACT_TESTS_H_
diff --git a/gurl_fuzzer.cc b/gurl_fuzzer.cc
new file mode 100644
index 00000000000..029a387e4c2
--- /dev/null
+++ b/gurl_fuzzer.cc
@@ -0,0 +1,89 @@
+// Copyright 2015 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/at_exit.h"
+#include "base/check_op.h"
+#include "base/i18n/icu_util.h"
+#include "base/no_destructor.h"
+#include "url/gurl.h"
+
+struct TestCase {
+  TestCase() { CHECK(base::i18n::InitializeICU()); }
+
+  // used by ICU integration.
+  base::AtExitManager at_exit_manager;
+};
+
+TestCase* test_case = new TestCase();
+
+// Checks that GURL's canonicalization is idempotent. This can help discover
+// issues like https://crbug.com/1128999.
+void CheckIdempotency(const GURL& url) {
+  if (!url.is_valid())
+    return;
+  const std::string& spec = url.spec();
+  GURL recanonicalized(spec);
+  CHECK(recanonicalized.is_valid());
+  CHECK_EQ(spec, recanonicalized.spec());
+}
+
+// Checks that |url.spec()| is preserved across a call to ReplaceComponents with
+// zero replacements, which is effectively a copy. This can help discover issues
+// like https://crbug.com/1075515.
+void CheckReplaceComponentsPreservesSpec(const GURL& url) {
+  static const base::NoDestructor<GURL::Replacements> no_op;
+  GURL copy = url.ReplaceComponents(*no_op);
+  CHECK_EQ(url.is_valid(), copy.is_valid());
+  if (url.is_valid()) {
+    CHECK_EQ(url.spec(), copy.spec());
+  }
+}
+
+// Entry point for LibFuzzer.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  if (size < 1)
+    return 0;
+  {
+    base::StringPiece string_piece_input(reinterpret_cast<const char*>(data),
+                                         size);
+    const GURL url_from_string_piece(string_piece_input);
+    CheckIdempotency(url_from_string_piece);
+    CheckReplaceComponentsPreservesSpec(url_from_string_piece);
+  }
+  // Test for StringPiece16 if size is even.
+  if (size % sizeof(char16_t) == 0) {
+    base::StringPiece16 string_piece_input16(
+        reinterpret_cast<const char16_t*>(data), size / sizeof(char16_t));
+    const GURL url_from_string_piece16(string_piece_input16);
+    CheckIdempotency(url_from_string_piece16);
+    CheckReplaceComponentsPreservesSpec(url_from_string_piece16);
+  }
+  // Resolve relative url tests.
+  {
+    size_t size_t_bytes = sizeof(size_t);
+    if (size < size_t_bytes + 1) {
+      return 0;
+    }
+    size_t relative_size =
+        *reinterpret_cast<const size_t*>(data) % (size - size_t_bytes);
+    std::string relative_string(
+        reinterpret_cast<const char*>(data + size_t_bytes), relative_size);
+    base::StringPiece string_piece_part_input(
+        reinterpret_cast<const char*>(data + size_t_bytes + relative_size),
+        size - relative_size - size_t_bytes);
+    const GURL url_from_string_piece_part(string_piece_part_input);
+    CheckIdempotency(url_from_string_piece_part);
+    CheckReplaceComponentsPreservesSpec(url_from_string_piece_part);
+
+    url_from_string_piece_part.Resolve(relative_string);
+
+    if (relative_size % sizeof(char16_t) == 0) {
+      std::u16string relative_string16(
+          reinterpret_cast<const char16_t*>(data + size_t_bytes),
+          relative_size / sizeof(char16_t));
+      url_from_string_piece_part.Resolve(relative_string16);
+    }
+  }
+  return 0;
+}
diff --git a/gurl_fuzzer.dict b/gurl_fuzzer.dict
new file mode 100644
index 00000000000..fcf7e035dd6
--- /dev/null
+++ b/gurl_fuzzer.dict
@@ -0,0 +1,432 @@
+# Copyright 2016 The Chromium Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# This block has been generated with testing/libfuzzer/dictionary_generator.py
+# using url_parse_fuzzer binary and RFC 3986.
+"DNS"
+"text"
+"TCP"
+"\"%D3%81%87%A4%95%81@%C2%85%81%83%88\"."
+"[RFC2234]"
+"F.,"
+"FORCE"
+"SOCIETY"
+"implementation"
+"TASK"
+"cache"
+"WINS,"
+"D.1."
+"to"
+"only"
+"HTML"
+"SPONSORED"
+"[RFC1630]."
+"D.,"
+"[RFC1123]"
+"resources"
+"(STD"
+"[RFC1808],"
+"string"
+"returning"
+"=="
+"H"
+"HEREIN"
+"[BCP35]"
+"SP)"
+"SCTP)"
+"(NUL)"
+"THE"
+"(URI):"
+"REPRESENTS"
+"resource"
+"A.,"
+"EXPRESS"
+"list"
+"(%2E),"
+"WILL"
+"J."
+"INCLUDING"
+"segment."
+"[RFC2732]"
+"(URL)\","
+"set"
+"HTTP"
+"IANA"
+"INFORMATION"
+"(%41-%5A"
+"[RFC2518]"
+"M."
+"direct"
+"(IDNA)\","
+"Only"
+"Version"
+"are"
+"allowed."
+"\"X\""
+"(SP)."
+"2DIGIT"
+"section"
+"BUT"
+"\"UTF-8,"
+"3"
+"version"
+"[RFC1034]"
+"probably"
+"[RFC2732]."
+"metadata"
+"Y.,"
+"C"
+"WWW\""
+"FOR"
+"0X"
+"S"
+"address"
+"INPUT"
+"["
+"P."
+"WWW:"
+"AND"
+"WWW"
+"[BCP35]."
+"MA"
+"\"AS"
+"\"%\""
+"NOT"
+"ANY"
+"[RFC1808]"
+"WARRANTY"
+"useful"
+"[RFC1737]."
+"[STD63],"
+"\"HTTP\""
+"(MIME)"
+"TELNET"
+"[RFC1630]"
+"S."
+"D.2."
+"B.,"
+"[RFC2234]."
+"[RFC2234],"
+"BCP"
+"[STD63];"
+"use"
+"LATIN"
+"from"
+"C."
+"0"
+"WARRANTIES"
+"(MHTML)\","
+"ENGINEERING"
+"URI;"
+"few"
+"(DNS)."
+"expected"
+"USENET"
+"type"
+"empty"
+"XML"
+"URL?\","
+"W3C/MIT"
+"F"
+"CA"
+"STD:"
+"SMTP"
+"[RFC2141],"
+"N"
+"A),"
+"NOTE:"
+"CR"
+"MHTML"
+"must"
+"ANY),"
+"ALL"
+"[STD63]"
+"RIGHTS"
+"HE/SHE"
+"SP"
+"[BCP19]"
+"value"
+"INFRINGE"
+"while"
+"KATAKANA"
+"US-ASCII"
+"W3C/IETF"
+"loop"
+"J.,"
+"2E:"
+"L."
+"have"
+"%61-%7A),"
+"is"
+"PARTICULAR"
+"thus"
+"URI,"
+"parse"
+"STEP"
+"MIME"
+"UTF-8"
+"in"
+"failed"
+"LF"
+"binary"
+"ISO/IEC"
+"\"A"
+"(%5F),"
+")"
+"HTTP,"
+"get"
+"\"A\","
+"[RFC2141]"
+"BUFFER"
+"ABNF"
+"[RFC2557]."
+"I."
+"WARRANTIES,"
+"URN"
+"EBCDIC"
+"A"
+"used"
+"http"
+"may"
+"IP"
+"IS"
+"after"
+"L"
+"Q"
+"'A'"
+"running"
+"HEXDIG"
+"such"
+"EBCDIC,"
+"data"
+"[ASCII]"
+"a"
+"P"
+"[ASCII]."
+"M.,"
+"Names"
+"the"
+"[RFC0952]."
+"[RFC3490]"
+"US-ASCII."
+"2C:"
+"THAT"
+"E.,"
+"(%2D),"
+"\"URL:\""
+"WITH"
+"BY"
+"[UCS],"
+"tables"
+"[UCS]"
+"TO"
+"BNF"
+"internal"
+"P.,"
+"ORGANIZATION"
+"\"HTTP"
+"URI."
+"it,"
+"D"
+"format"
+"URL"
+"(0"
+"URI\""
+"URI"
+"K."
+"URI:"
+"T"
+"D.W."
+"not"
+"R."
+"LIMITED"
+"\"%3A\")"
+"name"
+"OF"
+"B."
+"[RFC1736]"
+"(R),"
+"IPR"
+"[RFC1738];"
+"OUTPUT"
+"LALR"
+"OR"
+"STD"
+"[RFC3513]"
+"because"
+"bytes"
+"DNS,"
+"back"
+"(URI)"
+"*DIGIT"
+"[RFC2046]"
+"[RFC3305]"
+"W3C"
+"E."
+"for"
+"space"
+"ABNF\","
+"[RFC1535]."
+"DQUOTE"
+"I"
+"does"
+"'F'"
+"[RFC2396]"
+"be"
+"K.,"
+"DISCLAIM"
+"G"
+"(UTF-16),"
+"This"
+"M"
+"INTERNET"
+"RFC"
+"X3.4,"
+"base"
+"(T):"
+"IMPLIED,"
+"by"
+"\"URL\""
+"on"
+"DIGIT"
+"(ABNF)"
+"WEBDAV\","
+"of"
+"could"
+"R.,"
+"(ABNF:"
+"S.,"
+"1*4HEXDIG"
+"CAPITAL"
+"number"
+"one"
+"ISO"
+"FITNESS"
+"\"%7E\""
+"open"
+"ANSI"
+"[BCP19],"
+"\"%C3%80\","
+"IETF"
+"support"
+"\"URN"
+"[RFC1123]."
+"long"
+"[RFC0952]"
+":"
+"was"
+"[RFC3513]."
+"[RFC2718]"
+"B"
+"N."
+"that"
+"IDNA"
+"OCTET"
+"but"
+"R"
+"POSIX"
+"LETTER"
+"CONTRIBUTOR,"
+"[RFC1738]"
+"(C)"
+"with"
+"\"URI\""
+"16"
+"default"
+"double"
+"\"URN\""
+"[RFC2557]"
+"up"
+"TCP,"
+"PURPOSE."
+"MERCHANTABILITY"
+"1)"
+"IS\""
+"\"IANA"
+"(URN)"
+"and"
+"USE"
+"false"
+"(IF"
+"USA"
+"URL,"
+"an"
+"To"
+"as"
+"(%7E)"
+"at"
+"file"
+"need"
+"any"
+"\"%E3%82%A2\"."
+"physical"
+"1*HEXDIG"
+"no"
+"[RFC1737]"
+"-"
+"invalid"
+"A."
+"application"
+"valid"
+"take"
+"which"
+"test"
+"[RFC2732],"
+"you"
+"="
+"GRAVE"
+"<URI>"
+"[RFC2396],"
+"2B:"
+"period,"
+"UDP,"
+"[RFC1535]"
+"T."
+"(UCS)\","
+"U"
+"A-F."
+"T.,"
+"]"
+"[RFC2718]."
+"D."
+"persistent"
+"traditional"
+"L.,"
+"As"
+"IMPLIED"
+"(URL)"
+"ALPHA"
+"[RFC3305]."
+"H.,"
+"\"MIME"
+
+# This comes from https://crbug.com/1075515.
+"FilEsysteM:htTp:E=/."
+
+# This comes from https://crbug.com/1128999.
+"file:///.//"
+"file:////"
+
+# string declared from url/url_constants.cc
+"://"
+"about"
+"about:blank"
+"about:srcdoc"
+"blank"
+"blob"
+"cid"
+"content"
+"data"
+"file"
+"filesystem"
+"ftp"
+"http"
+"https"
+"javascript"
+"mailto"
+"quic-transport"
+"srcdoc"
+"tel"
+"ws"
+"wss"
diff --git a/gurl_unittest.cc b/gurl_unittest.cc
new file mode 100644
index 00000000000..af8421d9742
--- /dev/null
+++ b/gurl_unittest.cc
@@ -0,0 +1,1180 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/gurl.h"
+
+#include <stddef.h>
+
+#include "base/strings/string_number_conversions.h"
+#include "base/strings/utf_string_conversions.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/gurl_abstract_tests.h"
+#include "url/origin.h"
+#include "url/url_canon.h"
+#include "url/url_test_utils.h"
+
+namespace url {
+
+namespace {
+
+// Returns the canonicalized string for the given URL string for the
+// GURLTest.Types test.
+std::string TypesTestCase(const char* src) {
+  GURL gurl(src);
+  return gurl.possibly_invalid_spec();
+}
+
+}  // namespace
+
+// Different types of URLs should be handled differently, and handed off to
+// different canonicalizers.
+TEST(GURLTest, Types) {
+  // URLs with unknown schemes should be treated as path URLs, even when they
+  // have things like "://".
+  EXPECT_EQ("something:///HOSTNAME.com/",
+            TypesTestCase("something:///HOSTNAME.com/"));
+
+  // Conversely, URLs with known schemes should always trigger standard URL
+  // handling.
+  EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com"));
+  EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com"));
+  EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com"));
+  EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com"));
+
+#ifdef WIN32
+  // URLs that look like Windows absolute path specs.
+  EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt"));
+  EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt"));
+  EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt"));
+  EXPECT_EQ("file://server/foo.txt", TypesTestCase("//server/foo.txt"));
+#endif
+}
+
+// Test the basic creation and querying of components in a GURL. We assume that
+// the parser is already tested and works, so we are mostly interested if the
+// object does the right thing with the results.
+TEST(GURLTest, Components) {
+  GURL empty_url(u"");
+  EXPECT_TRUE(empty_url.is_empty());
+  EXPECT_FALSE(empty_url.is_valid());
+
+  GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref");
+  EXPECT_FALSE(url.is_empty());
+  EXPECT_TRUE(url.is_valid());
+  EXPECT_TRUE(url.SchemeIs("http"));
+  EXPECT_FALSE(url.SchemeIsFile());
+
+  // This is the narrow version of the URL, which should match the wide input.
+  EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url.spec());
+
+  EXPECT_EQ("http", url.scheme());
+  EXPECT_EQ("user", url.username());
+  EXPECT_EQ("pass", url.password());
+  EXPECT_EQ("google.com", url.host());
+  EXPECT_EQ("99", url.port());
+  EXPECT_EQ(99, url.IntPort());
+  EXPECT_EQ("/foo;bar", url.path());
+  EXPECT_EQ("q=a", url.query());
+  EXPECT_EQ("ref", url.ref());
+
+  // Test parsing userinfo with special characters.
+  GURL url_special_pass("http://user:%40!$&'()*+,;=:@google.com:12345");
+  EXPECT_TRUE(url_special_pass.is_valid());
+  // GURL canonicalizes some delimiters.
+  EXPECT_EQ("%40!$&%27()*+,%3B%3D%3A", url_special_pass.password());
+  EXPECT_EQ("google.com", url_special_pass.host());
+  EXPECT_EQ("12345", url_special_pass.port());
+}
+
+TEST(GURLTest, Empty) {
+  GURL url;
+  EXPECT_FALSE(url.is_valid());
+  EXPECT_EQ("", url.spec());
+
+  EXPECT_EQ("", url.scheme());
+  EXPECT_EQ("", url.username());
+  EXPECT_EQ("", url.password());
+  EXPECT_EQ("", url.host());
+  EXPECT_EQ("", url.port());
+  EXPECT_EQ(PORT_UNSPECIFIED, url.IntPort());
+  EXPECT_EQ("", url.path());
+  EXPECT_EQ("", url.query());
+  EXPECT_EQ("", url.ref());
+}
+
+TEST(GURLTest, Copy) {
+  GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref");
+
+  GURL url2(url);
+  EXPECT_TRUE(url2.is_valid());
+
+  EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec());
+  EXPECT_EQ("http", url2.scheme());
+  EXPECT_EQ("user", url2.username());
+  EXPECT_EQ("pass", url2.password());
+  EXPECT_EQ("google.com", url2.host());
+  EXPECT_EQ("99", url2.port());
+  EXPECT_EQ(99, url2.IntPort());
+  EXPECT_EQ("/foo;bar", url2.path());
+  EXPECT_EQ("q=a", url2.query());
+  EXPECT_EQ("ref", url2.ref());
+
+  // Copying of invalid URL should be invalid
+  GURL invalid;
+  GURL invalid2(invalid);
+  EXPECT_FALSE(invalid2.is_valid());
+  EXPECT_EQ("", invalid2.spec());
+  EXPECT_EQ("", invalid2.scheme());
+  EXPECT_EQ("", invalid2.username());
+  EXPECT_EQ("", invalid2.password());
+  EXPECT_EQ("", invalid2.host());
+  EXPECT_EQ("", invalid2.port());
+  EXPECT_EQ(PORT_UNSPECIFIED, invalid2.IntPort());
+  EXPECT_EQ("", invalid2.path());
+  EXPECT_EQ("", invalid2.query());
+  EXPECT_EQ("", invalid2.ref());
+}
+
+TEST(GURLTest, Assign) {
+  GURL url(u"http://user:pass@google.com:99/foo;bar?q=a#ref");
+
+  GURL url2;
+  url2 = url;
+  EXPECT_TRUE(url2.is_valid());
+
+  EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec());
+  EXPECT_EQ("http", url2.scheme());
+  EXPECT_EQ("user", url2.username());
+  EXPECT_EQ("pass", url2.password());
+  EXPECT_EQ("google.com", url2.host());
+  EXPECT_EQ("99", url2.port());
+  EXPECT_EQ(99, url2.IntPort());
+  EXPECT_EQ("/foo;bar", url2.path());
+  EXPECT_EQ("q=a", url2.query());
+  EXPECT_EQ("ref", url2.ref());
+
+  // Assignment of invalid URL should be invalid
+  GURL invalid;
+  GURL invalid2;
+  invalid2 = invalid;
+  EXPECT_FALSE(invalid2.is_valid());
+  EXPECT_EQ("", invalid2.spec());
+  EXPECT_EQ("", invalid2.scheme());
+  EXPECT_EQ("", invalid2.username());
+  EXPECT_EQ("", invalid2.password());
+  EXPECT_EQ("", invalid2.host());
+  EXPECT_EQ("", invalid2.port());
+  EXPECT_EQ(PORT_UNSPECIFIED, invalid2.IntPort());
+  EXPECT_EQ("", invalid2.path());
+  EXPECT_EQ("", invalid2.query());
+  EXPECT_EQ("", invalid2.ref());
+}
+
+// This is a regression test for http://crbug.com/309975.
+TEST(GURLTest, SelfAssign) {
+  GURL a("filesystem:http://example.com/temporary/");
+  // This should not crash.
+  a = *&a;  // The *& defeats Clang's -Wself-assign warning.
+}
+
+TEST(GURLTest, CopyFileSystem) {
+  GURL url(u"filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref");
+
+  GURL url2(url);
+  EXPECT_TRUE(url2.is_valid());
+
+  EXPECT_EQ("filesystem:https://google.com:99/t/foo;bar?q=a#ref", url2.spec());
+  EXPECT_EQ("filesystem", url2.scheme());
+  EXPECT_EQ("", url2.username());
+  EXPECT_EQ("", url2.password());
+  EXPECT_EQ("", url2.host());
+  EXPECT_EQ("", url2.port());
+  EXPECT_EQ(PORT_UNSPECIFIED, url2.IntPort());
+  EXPECT_EQ("/foo;bar", url2.path());
+  EXPECT_EQ("q=a", url2.query());
+  EXPECT_EQ("ref", url2.ref());
+
+  const GURL* inner = url2.inner_url();
+  ASSERT_TRUE(inner);
+  EXPECT_EQ("https", inner->scheme());
+  EXPECT_EQ("", inner->username());
+  EXPECT_EQ("", inner->password());
+  EXPECT_EQ("google.com", inner->host());
+  EXPECT_EQ("99", inner->port());
+  EXPECT_EQ(99, inner->IntPort());
+  EXPECT_EQ("/t", inner->path());
+  EXPECT_EQ("", inner->query());
+  EXPECT_EQ("", inner->ref());
+}
+
+TEST(GURLTest, IsValid) {
+  const char* valid_cases[] = {
+      "http://google.com",
+      "unknown://google.com",
+      "http://user:pass@google.com",
+      "http://google.com:12345",
+      "http://google.com:0",  // 0 is a valid port
+      "http://google.com/path",
+      "http://google.com//path",
+      "http://google.com?k=v#fragment",
+      "http://user:pass@google.com:12345/path?k=v#fragment",
+      "http:/path",
+      "http:path",
+  };
+  for (size_t i = 0; i < std::size(valid_cases); i++) {
+    EXPECT_TRUE(GURL(valid_cases[i]).is_valid())
+        << "Case: " << valid_cases[i];
+  }
+
+  const char* invalid_cases[] = {
+      "http://?k=v",
+      "http:://google.com",
+      "http//google.com",
+      "http://google.com:12three45",
+      "file://server:123",  // file: URLs cannot have a port
+      "file://server:0",
+      "://google.com",
+      "path",
+  };
+  for (size_t i = 0; i < std::size(invalid_cases); i++) {
+    EXPECT_FALSE(GURL(invalid_cases[i]).is_valid())
+        << "Case: " << invalid_cases[i];
+  }
+}
+
+TEST(GURLTest, ExtraSlashesBeforeAuthority) {
+  // According to RFC3986, the hierarchical part for URI with an authority
+  // must use only two slashes; GURL intentionally just ignores extra slashes
+  // if there are more than 2, and parses the following part as an authority.
+  GURL url("http:///host");
+  EXPECT_EQ("host", url.host());
+  EXPECT_EQ("/", url.path());
+}
+
+// Given invalid URLs, we should still get most of the components.
+TEST(GURLTest, ComponentGettersWorkEvenForInvalidURL) {
+  constexpr struct InvalidURLTestExpectations {
+    const char* url;
+    const char* spec;
+    const char* scheme;
+    const char* host;
+    const char* port;
+    const char* path;
+    // Extend as needed...
+  } expectations[] = {
+      {
+          "http:google.com:foo",
+          "http://google.com:foo/",
+          "http",
+          "google.com",
+          "foo",
+          "/",
+      },
+      {
+          "https:google.com:foo",
+          "https://google.com:foo/",
+          "https",
+          "google.com",
+          "foo",
+          "/",
+      },
+  };
+
+  for (const auto& e : expectations) {
+    const GURL url(e.url);
+    EXPECT_FALSE(url.is_valid());
+    EXPECT_EQ(e.spec, url.possibly_invalid_spec());
+    EXPECT_EQ(e.scheme, url.scheme());
+    EXPECT_EQ("", url.username());
+    EXPECT_EQ("", url.password());
+    EXPECT_EQ(e.host, url.host());
+    EXPECT_EQ(e.port, url.port());
+    EXPECT_EQ(PORT_INVALID, url.IntPort());
+    EXPECT_EQ(e.path, url.path());
+    EXPECT_EQ("", url.query());
+    EXPECT_EQ("", url.ref());
+  }
+}
+
+TEST(GURLTest, Resolve) {
+  // The tricky cases for relative URL resolving are tested in the
+  // canonicalizer unit test. Here, we just test that the GURL integration
+  // works properly.
+  struct ResolveCase {
+    const char* base;
+    const char* relative;
+    bool expected_valid;
+    const char* expected;
+  } resolve_cases[] = {
+      {"http://www.google.com/", "foo.html", true,
+       "http://www.google.com/foo.html"},
+      {"http://www.google.com/foo/", "bar", true,
+       "http://www.google.com/foo/bar"},
+      {"http://www.google.com/foo/", "/bar", true, "http://www.google.com/bar"},
+      {"http://www.google.com/foo", "bar", true, "http://www.google.com/bar"},
+      {"http://www.google.com/", "http://images.google.com/foo.html", true,
+       "http://images.google.com/foo.html"},
+      {"http://www.google.com/", "http://images.\tgoogle.\ncom/\rfoo.html",
+       true, "http://images.google.com/foo.html"},
+      {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b",
+       true, "http://www.google.com/hello/world.html?a#b"},
+      {"http://www.google.com/foo#bar", "#com", true,
+       "http://www.google.com/foo#com"},
+      {"http://www.google.com/", "Https:images.google.com", true,
+       "https://images.google.com/"},
+      // A non-standard base can be replaced with a standard absolute URL.
+      {"data:blahblah", "http://google.com/", true, "http://google.com/"},
+      {"data:blahblah", "http:google.com", true, "http://google.com/"},
+      {"data:blahblah", "https:google.com", true, "https://google.com/"},
+      // Filesystem URLs have different paths to test.
+      {"filesystem:http://www.google.com/type/", "foo.html", true,
+       "filesystem:http://www.google.com/type/foo.html"},
+      {"filesystem:http://www.google.com/type/", "../foo.html", true,
+       "filesystem:http://www.google.com/type/foo.html"},
+      // https://crbug.com/530123 - scheme validation (e.g. are "10.0.0.7:"
+      // or "x1:" valid schemes) when deciding if |relative| is an absolute url.
+      {"file:///some/dir/ip-relative.html", "10.0.0.7:8080/foo.html", true,
+       "file:///some/dir/10.0.0.7:8080/foo.html"},
+      {"file:///some/dir/", "1://host", true, "file:///some/dir/1://host"},
+      {"file:///some/dir/", "x1://host", true, "x1://host"},
+      {"file:///some/dir/", "X1://host", true, "x1://host"},
+      {"file:///some/dir/", "x.://host", true, "x.://host"},
+      {"file:///some/dir/", "x+://host", true, "x+://host"},
+      {"file:///some/dir/", "x-://host", true, "x-://host"},
+      {"file:///some/dir/", "x!://host", true, "file:///some/dir/x!://host"},
+      {"file:///some/dir/", "://host", true, "file:///some/dir/://host"},
+  };
+
+  for (size_t i = 0; i < std::size(resolve_cases); i++) {
+    // 8-bit code path.
+    GURL input(resolve_cases[i].base);
+    GURL output = input.Resolve(resolve_cases[i].relative);
+    EXPECT_EQ(resolve_cases[i].expected_valid, output.is_valid()) << i;
+    EXPECT_EQ(resolve_cases[i].expected, output.spec()) << i;
+    EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL);
+
+    // Wide code path.
+    GURL inputw(base::UTF8ToUTF16(resolve_cases[i].base));
+    GURL outputw =
+        input.Resolve(base::UTF8ToUTF16(resolve_cases[i].relative));
+    EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid()) << i;
+    EXPECT_EQ(resolve_cases[i].expected, outputw.spec()) << i;
+    EXPECT_EQ(outputw.SchemeIsFileSystem(), outputw.inner_url() != NULL);
+  }
+}
+
+TEST(GURLTest, GetOrigin) {
+  struct TestCase {
+    const char* input;
+    const char* expected;
+  } cases[] = {
+      {"http://www.google.com", "http://www.google.com/"},
+      {"javascript:window.alert(\"hello,world\");", ""},
+      {"http://user:pass@www.google.com:21/blah#baz",
+       "http://www.google.com:21/"},
+      {"http://user@www.google.com", "http://www.google.com/"},
+      {"http://:pass@www.google.com", "http://www.google.com/"},
+      {"http://:@www.google.com", "http://www.google.com/"},
+      {"filesystem:http://www.google.com/temp/foo?q#b",
+       "http://www.google.com/"},
+      {"filesystem:http://user:pass@google.com:21/blah#baz",
+       "http://google.com:21/"},
+      {"blob:null/guid-goes-here", ""},
+      {"blob:http://origin/guid-goes-here", "" /* should be http://origin/ */},
+  };
+  for (size_t i = 0; i < std::size(cases); i++) {
+    GURL url(cases[i].input);
+    GURL origin = url.DeprecatedGetOriginAsURL();
+    EXPECT_EQ(cases[i].expected, origin.spec());
+  }
+}
+
+TEST(GURLTest, GetAsReferrer) {
+  struct TestCase {
+    const char* input;
+    const char* expected;
+  } cases[] = {
+    {"http://www.google.com", "http://www.google.com/"},
+    {"http://user:pass@www.google.com:21/blah#baz", "http://www.google.com:21/blah"},
+    {"http://user@www.google.com", "http://www.google.com/"},
+    {"http://:pass@www.google.com", "http://www.google.com/"},
+    {"http://:@www.google.com", "http://www.google.com/"},
+    {"http://www.google.com/temp/foo?q#b", "http://www.google.com/temp/foo?q"},
+    {"not a url", ""},
+    {"unknown-scheme://foo.html", ""},
+    {"file:///tmp/test.html", ""},
+    {"https://www.google.com", "https://www.google.com/"},
+  };
+  for (size_t i = 0; i < std::size(cases); i++) {
+    GURL url(cases[i].input);
+    GURL origin = url.GetAsReferrer();
+    EXPECT_EQ(cases[i].expected, origin.spec());
+  }
+}
+
+TEST(GURLTest, GetWithEmptyPath) {
+  struct TestCase {
+    const char* input;
+    const char* expected;
+  } cases[] = {
+    {"http://www.google.com", "http://www.google.com/"},
+    {"javascript:window.alert(\"hello, world\");", ""},
+    {"http://www.google.com/foo/bar.html?baz=22", "http://www.google.com/"},
+    {"filesystem:http://www.google.com/temporary/bar.html?baz=22", "filesystem:http://www.google.com/temporary/"},
+    {"filesystem:file:///temporary/bar.html?baz=22", "filesystem:file:///temporary/"},
+  };
+
+  for (size_t i = 0; i < std::size(cases); i++) {
+    GURL url(cases[i].input);
+    GURL empty_path = url.GetWithEmptyPath();
+    EXPECT_EQ(cases[i].expected, empty_path.spec());
+  }
+}
+
+TEST(GURLTest, GetWithoutFilename) {
+  struct TestCase {
+    const char* input;
+    const char* expected;
+  } cases[] = {
+    // Common Standard URLs.
+    {"https://www.google.com",                    "https://www.google.com/"},
+    {"https://www.google.com/",                   "https://www.google.com/"},
+    {"https://www.google.com/maps.htm",           "https://www.google.com/"},
+    {"https://www.google.com/maps/",              "https://www.google.com/maps/"},
+    {"https://www.google.com/index.html",         "https://www.google.com/"},
+    {"https://www.google.com/index.html?q=maps",  "https://www.google.com/"},
+    {"https://www.google.com/index.html#maps/",   "https://www.google.com/"},
+    {"https://foo:bar@www.google.com/maps.htm",   "https://foo:bar@www.google.com/"},
+    {"https://www.google.com/maps/au/index.html", "https://www.google.com/maps/au/"},
+    {"https://www.google.com/maps/au/north",      "https://www.google.com/maps/au/"},
+    {"https://www.google.com/maps/au/north/",     "https://www.google.com/maps/au/north/"},
+    {"https://www.google.com/maps/au/index.html?q=maps#fragment/",     "https://www.google.com/maps/au/"},
+    {"http://www.google.com:8000/maps/au/index.html?q=maps#fragment/", "http://www.google.com:8000/maps/au/"},
+    {"https://www.google.com/maps/au/north/?q=maps#fragment",          "https://www.google.com/maps/au/north/"},
+    {"https://www.google.com/maps/au/north?q=maps#fragment",           "https://www.google.com/maps/au/"},
+    // Less common standard URLs.
+    {"filesystem:http://www.google.com/temporary/bar.html?baz=22", "filesystem:http://www.google.com/temporary/"},
+    {"file:///temporary/bar.html?baz=22","file:///temporary/"},
+    {"ftp://foo/test/index.html",        "ftp://foo/test/"},
+    {"gopher://foo/test/index.html",     "gopher://foo/test/"},
+    {"ws://foo/test/index.html",         "ws://foo/test/"},
+    // Non-standard, hierarchical URLs.
+    {"chrome://foo/bar.html", "chrome://foo/"},
+    {"httpa://foo/test/index.html", "httpa://foo/test/"},
+    // Non-standard, non-hierarchical URLs.
+    {"blob:https://foo.bar/test/index.html", ""},
+    {"about:blank", ""},
+    {"data:foobar", ""},
+    {"scheme:opaque_data", ""},
+    // Invalid URLs.
+    {"foobar", ""},
+  };
+
+  for (size_t i = 0; i < std::size(cases); i++) {
+    GURL url(cases[i].input);
+    GURL without_filename = url.GetWithoutFilename();
+    EXPECT_EQ(cases[i].expected, without_filename.spec()) << i;
+  }
+}
+
+TEST(GURLTest, GetWithoutRef) {
+  struct TestCase {
+    const char* input;
+    const char* expected;
+  } cases[] = {
+      // Common Standard URLs.
+      {"https://www.google.com/index.html",
+       "https://www.google.com/index.html"},
+      {"https://www.google.com/index.html#maps/",
+       "https://www.google.com/index.html"},
+
+      {"https://foo:bar@www.google.com/maps.htm",
+       "https://foo:bar@www.google.com/maps.htm"},
+      {"https://foo:bar@www.google.com/maps.htm#fragment",
+       "https://foo:bar@www.google.com/maps.htm"},
+
+      {"https://www.google.com/maps/au/index.html?q=maps",
+       "https://www.google.com/maps/au/index.html?q=maps"},
+      {"https://www.google.com/maps/au/index.html?q=maps#fragment/",
+       "https://www.google.com/maps/au/index.html?q=maps"},
+
+      {"http://www.google.com:8000/maps/au/index.html?q=maps",
+       "http://www.google.com:8000/maps/au/index.html?q=maps"},
+      {"http://www.google.com:8000/maps/au/index.html?q=maps#fragment/",
+       "http://www.google.com:8000/maps/au/index.html?q=maps"},
+
+      {"https://www.google.com/maps/au/north/?q=maps",
+       "https://www.google.com/maps/au/north/?q=maps"},
+      {"https://www.google.com/maps/au/north?q=maps#fragment",
+       "https://www.google.com/maps/au/north?q=maps"},
+
+      // Less common standard URLs.
+      {"filesystem:http://www.google.com/temporary/bar.html?baz=22",
+       "filesystem:http://www.google.com/temporary/bar.html?baz=22"},
+      {"file:///temporary/bar.html?baz=22#fragment",
+       "file:///temporary/bar.html?baz=22"},
+
+      {"ftp://foo/test/index.html", "ftp://foo/test/index.html"},
+      {"ftp://foo/test/index.html#fragment", "ftp://foo/test/index.html"},
+
+      {"gopher://foo/test/index.html", "gopher://foo/test/index.html"},
+      {"gopher://foo/test/index.html#fragment", "gopher://foo/test/index.html"},
+
+      {"ws://foo/test/index.html", "ws://foo/test/index.html"},
+      {"ws://foo/test/index.html#fragment", "ws://foo/test/index.html"},
+
+      // Non-standard, hierarchical URLs.
+      {"chrome://foo/bar.html", "chrome://foo/bar.html"},
+      {"chrome://foo/bar.html#fragment", "chrome://foo/bar.html"},
+
+      {"httpa://foo/test/index.html", "httpa://foo/test/index.html"},
+      {"httpa://foo/test/index.html#fragment", "httpa://foo/test/index.html"},
+
+      // Non-standard, non-hierarchical URLs.
+      {"blob:https://foo.bar/test/index.html",
+       "blob:https://foo.bar/test/index.html"},
+      {"blob:https://foo.bar/test/index.html#fragment",
+       "blob:https://foo.bar/test/index.html"},
+
+      {"about:blank", "about:blank"},
+      {"about:blank#ref", "about:blank"},
+
+      {"data:foobar", "data:foobar"},
+      {"scheme:opaque_data", "scheme:opaque_data"},
+      // Invalid URLs.
+      {"foobar", ""},
+  };
+
+  for (size_t i = 0; i < std::size(cases); i++) {
+    GURL url(cases[i].input);
+    GURL without_ref = url.GetWithoutRef();
+    EXPECT_EQ(cases[i].expected, without_ref.spec());
+  }
+}
+
+TEST(GURLTest, Replacements) {
+  // The URL canonicalizer replacement test will handle most of these case.
+  // The most important thing to do here is to check that the proper
+  // canonicalizer gets called based on the scheme of the input.
+  struct ReplaceCase {
+    using ApplyReplacementsFunc = GURL(const GURL&);
+
+    const char* base;
+    ApplyReplacementsFunc* apply_replacements;
+    const char* expected;
+  } replace_cases[] = {
+      {.base = "http://www.google.com/foo/bar.html?foo#bar",
+       .apply_replacements =
+           +[](const GURL& url) {
+             GURL::Replacements replacements;
+             replacements.SetPathStr("/");
+             replacements.ClearQuery();
+             replacements.ClearRef();
+             return url.ReplaceComponents(replacements);
+           },
+       .expected = "http://www.google.com/"},
+      {.base = "http://www.google.com/foo/bar.html?foo#bar",
+       .apply_replacements =
+           +[](const GURL& url) {
+             GURL::Replacements replacements;
+             replacements.SetSchemeStr("javascript");
+             replacements.ClearUsername();
+             replacements.ClearPassword();
+             replacements.ClearHost();
+             replacements.ClearPort();
+             replacements.SetPathStr("window.open('foo');");
+             replacements.ClearQuery();
+             replacements.ClearRef();
+             return url.ReplaceComponents(replacements);
+           },
+       .expected = "javascript:window.open('foo');"},
+      {.base = "file:///C:/foo/bar.txt",
+       .apply_replacements =
+           +[](const GURL& url) {
+             GURL::Replacements replacements;
+             replacements.SetSchemeStr("http");
+             replacements.SetHostStr("www.google.com");
+             replacements.SetPortStr("99");
+             replacements.SetPathStr("/foo");
+             replacements.SetQueryStr("search");
+             replacements.SetRefStr("ref");
+             return url.ReplaceComponents(replacements);
+           },
+       .expected = "http://www.google.com:99/foo?search#ref"},
+#ifdef WIN32
+      {.base = "http://www.google.com/foo/bar.html?foo#bar",
+       .apply_replacements =
+           +[](const GURL& url) {
+             GURL::Replacements replacements;
+             replacements.SetSchemeStr("file");
+             replacements.ClearUsername();
+             replacements.ClearPassword();
+             replacements.ClearHost();
+             replacements.ClearPort();
+             replacements.SetPathStr("c:\\");
+             replacements.ClearQuery();
+             replacements.ClearRef();
+             return url.ReplaceComponents(replacements);
+           },
+       .expected = "file:///C:/"},
+#endif
+      {.base = "filesystem:http://www.google.com/foo/bar.html?foo#bar",
+       .apply_replacements =
+           +[](const GURL& url) {
+             GURL::Replacements replacements;
+             replacements.SetPathStr("/");
+             replacements.ClearQuery();
+             replacements.ClearRef();
+             return url.ReplaceComponents(replacements);
+           },
+       .expected = "filesystem:http://www.google.com/foo/"},
+      // Lengthen the URL instead of shortening it, to test creation of
+      // inner_url.
+      {.base = "filesystem:http://www.google.com/foo/",
+       .apply_replacements =
+           +[](const GURL& url) {
+             GURL::Replacements replacements;
+             replacements.SetPathStr("bar.html");
+             replacements.SetQueryStr("foo");
+             replacements.SetRefStr("bar");
+             return url.ReplaceComponents(replacements);
+           },
+       .expected = "filesystem:http://www.google.com/foo/bar.html?foo#bar"},
+  };
+
+  for (const ReplaceCase& c : replace_cases) {
+    GURL output = c.apply_replacements(GURL(c.base));
+
+    EXPECT_EQ(c.expected, output.spec());
+
+    EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL);
+    if (output.SchemeIsFileSystem()) {
+      // TODO(mmenke): inner_url()->spec() is currently the same as the spec()
+      // for the GURL itself.  This should be fixed.
+      // See https://crbug.com/619596
+      EXPECT_EQ(c.expected, output.inner_url()->spec());
+    }
+  }
+}
+
+TEST(GURLTest, ClearFragmentOnDataUrl) {
+  // http://crbug.com/291747 - a data URL may legitimately have trailing
+  // whitespace in the spec after the ref is cleared. Test this does not trigger
+  // the Parsed importing validation DCHECK in GURL.
+  GURL url(" data: one # two ");
+  EXPECT_TRUE(url.is_valid());
+
+  // By default the trailing whitespace will have been stripped.
+  EXPECT_EQ("data: one #%20two", url.spec());
+
+  // Clear the URL's ref and observe the trailing whitespace.
+  GURL::Replacements repl;
+  repl.ClearRef();
+  GURL url_no_ref = url.ReplaceComponents(repl);
+  EXPECT_TRUE(url_no_ref.is_valid());
+  EXPECT_EQ("data: one ", url_no_ref.spec());
+
+  // Importing a parsed URL via this constructor overload will retain trailing
+  // whitespace.
+  GURL import_url(url_no_ref.spec(),
+                  url_no_ref.parsed_for_possibly_invalid_spec(),
+                  url_no_ref.is_valid());
+  EXPECT_TRUE(import_url.is_valid());
+  EXPECT_EQ(url_no_ref, import_url);
+  EXPECT_EQ("data: one ", import_url.spec());
+  EXPECT_EQ(" one ", import_url.path());
+
+  // For completeness, test that re-parsing the same URL rather than importing
+  // it trims the trailing whitespace.
+  GURL reparsed_url(url_no_ref.spec());
+  EXPECT_TRUE(reparsed_url.is_valid());
+  EXPECT_EQ("data: one", reparsed_url.spec());
+}
+
+TEST(GURLTest, PathForRequest) {
+  struct TestCase {
+    const char* input;
+    const char* expected;
+    const char* inner_expected;
+  } cases[] = {
+      {"http://www.google.com", "/", nullptr},
+      {"http://www.google.com/", "/", nullptr},
+      {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22",
+       nullptr},
+      {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html", nullptr},
+      {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query",
+       nullptr},
+      {"filesystem:http://www.google.com/temporary/foo/bar.html?query#ref",
+       "/foo/bar.html?query", "/temporary"},
+      {"filesystem:http://www.google.com/temporary/foo/bar.html?query",
+       "/foo/bar.html?query", "/temporary"},
+  };
+
+  for (size_t i = 0; i < std::size(cases); i++) {
+    GURL url(cases[i].input);
+    EXPECT_EQ(cases[i].expected, url.PathForRequest());
+    EXPECT_EQ(cases[i].expected, url.PathForRequestPiece());
+    EXPECT_EQ(cases[i].inner_expected == NULL, url.inner_url() == NULL);
+    if (url.inner_url() && cases[i].inner_expected) {
+      EXPECT_EQ(cases[i].inner_expected, url.inner_url()->PathForRequest());
+      EXPECT_EQ(cases[i].inner_expected,
+                url.inner_url()->PathForRequestPiece());
+    }
+  }
+}
+
+TEST(GURLTest, EffectiveIntPort) {
+  struct PortTest {
+    const char* spec;
+    int expected_int_port;
+  } port_tests[] = {
+    // http
+    {"http://www.google.com/", 80},
+    {"http://www.google.com:80/", 80},
+    {"http://www.google.com:443/", 443},
+
+    // https
+    {"https://www.google.com/", 443},
+    {"https://www.google.com:443/", 443},
+    {"https://www.google.com:80/", 80},
+
+    // ftp
+    {"ftp://www.google.com/", 21},
+    {"ftp://www.google.com:21/", 21},
+    {"ftp://www.google.com:80/", 80},
+
+    // file - no port
+    {"file://www.google.com/", PORT_UNSPECIFIED},
+    {"file://www.google.com:443/", PORT_UNSPECIFIED},
+
+    // data - no port
+    {"data:www.google.com:90", PORT_UNSPECIFIED},
+    {"data:www.google.com", PORT_UNSPECIFIED},
+
+    // filesystem - no port
+    {"filesystem:http://www.google.com:90/t/foo", PORT_UNSPECIFIED},
+    {"filesystem:file:///t/foo", PORT_UNSPECIFIED},
+  };
+
+  for (size_t i = 0; i < std::size(port_tests); i++) {
+    GURL url(port_tests[i].spec);
+    EXPECT_EQ(port_tests[i].expected_int_port, url.EffectiveIntPort());
+  }
+}
+
+TEST(GURLTest, IPAddress) {
+  struct IPTest {
+    const char* spec;
+    bool expected_ip;
+  } ip_tests[] = {
+    {"http://www.google.com/", false},
+    {"http://192.168.9.1/", true},
+    {"http://192.168.9.1.2/", false},
+    {"http://192.168.m.1/", false},
+    {"http://2001:db8::1/", false},
+    {"http://[2001:db8::1]/", true},
+    {"", false},
+    {"some random input!", false},
+  };
+
+  for (size_t i = 0; i < std::size(ip_tests); i++) {
+    GURL url(ip_tests[i].spec);
+    EXPECT_EQ(ip_tests[i].expected_ip, url.HostIsIPAddress());
+  }
+}
+
+TEST(GURLTest, HostNoBrackets) {
+  struct TestCase {
+    const char* input;
+    const char* expected_host;
+    const char* expected_plainhost;
+  } cases[] = {
+    {"http://www.google.com", "www.google.com", "www.google.com"},
+    {"http://[2001:db8::1]/", "[2001:db8::1]", "2001:db8::1"},
+    {"http://[::]/", "[::]", "::"},
+
+    // Don't require a valid URL, but don't crash either.
+    {"http://[]/", "[]", ""},
+    {"http://[x]/", "[x]", "x"},
+    {"http://[x/", "[x", "[x"},
+    {"http://x]/", "x]", "x]"},
+    {"http://[/", "[", "["},
+    {"http://]/", "]", "]"},
+    {"", "", ""},
+  };
+  for (size_t i = 0; i < std::size(cases); i++) {
+    GURL url(cases[i].input);
+    EXPECT_EQ(cases[i].expected_host, url.host());
+    EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBrackets());
+    EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBracketsPiece());
+  }
+}
+
+TEST(GURLTest, DomainIs) {
+  GURL url_1("http://google.com/foo");
+  EXPECT_TRUE(url_1.DomainIs("google.com"));
+
+  // Subdomain and port are ignored.
+  GURL url_2("http://www.google.com:99/foo");
+  EXPECT_TRUE(url_2.DomainIs("google.com"));
+
+  // Different top-level domain.
+  GURL url_3("http://www.google.com.cn/foo");
+  EXPECT_FALSE(url_3.DomainIs("google.com"));
+
+  // Different host name.
+  GURL url_4("http://www.iamnotgoogle.com/foo");
+  EXPECT_FALSE(url_4.DomainIs("google.com"));
+
+  // The input must be lower-cased otherwise DomainIs returns false.
+  GURL url_5("http://www.google.com/foo");
+  EXPECT_FALSE(url_5.DomainIs("Google.com"));
+
+  // If the URL is invalid, DomainIs returns false.
+  GURL invalid_url("google.com");
+  EXPECT_FALSE(invalid_url.is_valid());
+  EXPECT_FALSE(invalid_url.DomainIs("google.com"));
+
+  GURL url_with_escape_chars("https://www.,.test");
+  EXPECT_TRUE(url_with_escape_chars.is_valid());
+  EXPECT_EQ(url_with_escape_chars.host(), "www.%2C.test");
+  EXPECT_TRUE(url_with_escape_chars.DomainIs("%2C.test"));
+}
+
+TEST(GURLTest, DomainIsTerminatingDotBehavior) {
+  // If the host part ends with a dot, it matches input domains
+  // with or without a dot.
+  GURL url_with_dot("http://www.google.com./foo");
+  EXPECT_TRUE(url_with_dot.DomainIs("google.com"));
+  EXPECT_TRUE(url_with_dot.DomainIs("google.com."));
+  EXPECT_TRUE(url_with_dot.DomainIs(".com"));
+  EXPECT_TRUE(url_with_dot.DomainIs(".com."));
+
+  // But, if the host name doesn't end with a dot and the input
+  // domain does, then it's considered to not match.
+  GURL url_without_dot("http://google.com/foo");
+  EXPECT_FALSE(url_without_dot.DomainIs("google.com."));
+
+  // If the URL ends with two dots, it doesn't match.
+  GURL url_with_two_dots("http://www.google.com../foo");
+  EXPECT_FALSE(url_with_two_dots.DomainIs("google.com"));
+}
+
+TEST(GURLTest, DomainIsWithFilesystemScheme) {
+  GURL url_1("filesystem:http://www.google.com:99/foo/");
+  EXPECT_TRUE(url_1.DomainIs("google.com"));
+
+  GURL url_2("filesystem:http://www.iamnotgoogle.com/foo/");
+  EXPECT_FALSE(url_2.DomainIs("google.com"));
+}
+
+// Newlines should be stripped from inputs.
+TEST(GURLTest, Newlines) {
+  // Constructor.
+  GURL url_1(" \t ht\ntp://\twww.goo\rgle.com/as\ndf \n ");
+  EXPECT_EQ("http://www.google.com/asdf", url_1.spec());
+  EXPECT_FALSE(
+      url_1.parsed_for_possibly_invalid_spec().potentially_dangling_markup);
+
+  // Relative path resolver.
+  GURL url_2 = url_1.Resolve(" \n /fo\to\r ");
+  EXPECT_EQ("http://www.google.com/foo", url_2.spec());
+  EXPECT_FALSE(
+      url_2.parsed_for_possibly_invalid_spec().potentially_dangling_markup);
+
+  // Constructor.
+  GURL url_3(" \t ht\ntp://\twww.goo\rgle.com/as\ndf< \n ");
+  EXPECT_EQ("http://www.google.com/asdf%3C", url_3.spec());
+  EXPECT_TRUE(
+      url_3.parsed_for_possibly_invalid_spec().potentially_dangling_markup);
+
+  // Relative path resolver.
+  GURL url_4 = url_1.Resolve(" \n /fo\to<\r ");
+  EXPECT_EQ("http://www.google.com/foo%3C", url_4.spec());
+  EXPECT_TRUE(
+      url_4.parsed_for_possibly_invalid_spec().potentially_dangling_markup);
+
+  // Note that newlines are NOT stripped from ReplaceComponents.
+}
+
+TEST(GURLTest, IsStandard) {
+  GURL a("http:foo/bar");
+  EXPECT_TRUE(a.IsStandard());
+
+  GURL b("foo:bar/baz");
+  EXPECT_FALSE(b.IsStandard());
+
+  GURL c("foo://bar/baz");
+  EXPECT_FALSE(c.IsStandard());
+
+  GURL d("cid:bar@baz");
+  EXPECT_FALSE(d.IsStandard());
+}
+
+TEST(GURLTest, SchemeIsHTTPOrHTTPS) {
+  EXPECT_TRUE(GURL("http://bar/").SchemeIsHTTPOrHTTPS());
+  EXPECT_TRUE(GURL("HTTPS://BAR").SchemeIsHTTPOrHTTPS());
+  EXPECT_FALSE(GURL("ftp://bar/").SchemeIsHTTPOrHTTPS());
+}
+
+TEST(GURLTest, SchemeIsWSOrWSS) {
+  EXPECT_TRUE(GURL("WS://BAR/").SchemeIsWSOrWSS());
+  EXPECT_TRUE(GURL("wss://bar/").SchemeIsWSOrWSS());
+  EXPECT_FALSE(GURL("http://bar/").SchemeIsWSOrWSS());
+}
+
+TEST(GURLTest, SchemeIsCryptographic) {
+  EXPECT_TRUE(GURL("https://foo.bar.com/").SchemeIsCryptographic());
+  EXPECT_TRUE(GURL("HTTPS://foo.bar.com/").SchemeIsCryptographic());
+  EXPECT_TRUE(GURL("HtTpS://foo.bar.com/").SchemeIsCryptographic());
+
+  EXPECT_TRUE(GURL("wss://foo.bar.com/").SchemeIsCryptographic());
+  EXPECT_TRUE(GURL("WSS://foo.bar.com/").SchemeIsCryptographic());
+  EXPECT_TRUE(GURL("WsS://foo.bar.com/").SchemeIsCryptographic());
+
+  EXPECT_FALSE(GURL("http://foo.bar.com/").SchemeIsCryptographic());
+  EXPECT_FALSE(GURL("ws://foo.bar.com/").SchemeIsCryptographic());
+}
+
+TEST(GURLTest, SchemeIsCryptographicStatic) {
+  EXPECT_TRUE(GURL::SchemeIsCryptographic("https"));
+  EXPECT_TRUE(GURL::SchemeIsCryptographic("wss"));
+  EXPECT_FALSE(GURL::SchemeIsCryptographic("http"));
+  EXPECT_FALSE(GURL::SchemeIsCryptographic("ws"));
+  EXPECT_FALSE(GURL::SchemeIsCryptographic("ftp"));
+}
+
+TEST(GURLTest, SchemeIsBlob) {
+  EXPECT_TRUE(GURL("BLOB://BAR/").SchemeIsBlob());
+  EXPECT_TRUE(GURL("blob://bar/").SchemeIsBlob());
+  EXPECT_FALSE(GURL("http://bar/").SchemeIsBlob());
+}
+
+TEST(GURLTest, SchemeIsLocal) {
+  EXPECT_TRUE(GURL("BLOB://BAR/").SchemeIsLocal());
+  EXPECT_TRUE(GURL("blob://bar/").SchemeIsLocal());
+  EXPECT_TRUE(GURL("DATA:TEXT/HTML,BAR").SchemeIsLocal());
+  EXPECT_TRUE(GURL("data:text/html,bar").SchemeIsLocal());
+  EXPECT_TRUE(GURL("ABOUT:BAR").SchemeIsLocal());
+  EXPECT_TRUE(GURL("about:bar").SchemeIsLocal());
+  EXPECT_TRUE(GURL("FILESYSTEM:HTTP://FOO.EXAMPLE/BAR").SchemeIsLocal());
+  EXPECT_TRUE(GURL("filesystem:http://foo.example/bar").SchemeIsLocal());
+
+  EXPECT_FALSE(GURL("http://bar/").SchemeIsLocal());
+  EXPECT_FALSE(GURL("file:///bar").SchemeIsLocal());
+}
+
+// Tests that the 'content' of the URL is properly extracted. This can be
+// complex in cases such as multiple schemes (view-source:http:) or for
+// javascript URLs. See GURL::GetContent for more details.
+TEST(GURLTest, ContentForNonStandardURLs) {
+  struct TestCase {
+    const char* url;
+    const char* expected;
+  } cases[] = {
+      {"null", ""},
+      {"not-a-standard-scheme:this is arbitrary content",
+       "this is arbitrary content"},
+
+      // When there are multiple schemes, only the first is excluded from the
+      // content. Note also that for e.g. 'http://', the '//' is part of the
+      // content not the scheme.
+      {"view-source:http://example.com/path", "http://example.com/path"},
+      {"blob:http://example.com/GUID", "http://example.com/GUID"},
+      {"blob://http://example.com/GUID", "//http://example.com/GUID"},
+      {"blob:http://user:password@example.com/GUID",
+       "http://user:password@example.com/GUID"},
+
+      // The octothorpe character ('#') marks the end of the URL content, and
+      // the start of the fragment. It should not be included in the content.
+      {"http://www.example.com/GUID#ref", "www.example.com/GUID"},
+      {"http://me:secret@example.com/GUID/#ref", "me:secret@example.com/GUID/"},
+      {"data:text/html,Question?<div style=\"color: #bad\">idea</div>",
+       "text/html,Question?%3Cdiv%20style=%22color:%20"},
+
+      // TODO(mkwst): This seems like a bug. https://crbug.com/513600
+      {"filesystem:http://example.com/path", "/"},
+
+      // Javascript URLs include '#' symbols in their content.
+      {"javascript:#", "#"},
+      {"javascript:alert('#');", "alert('#');"},
+  };
+
+  for (const auto& test : cases) {
+    GURL url(test.url);
+    EXPECT_EQ(test.expected, url.GetContent()) << test.url;
+    EXPECT_EQ(test.expected, url.GetContentPiece()) << test.url;
+  }
+}
+
+// Tests that the URL path is properly extracted for unusual URLs. This can be
+// complex in cases such as multiple schemes (view-source:http:) or when
+// octothorpes ('#') are involved.
+TEST(GURLTest, PathForNonStandardURLs) {
+  struct TestCase {
+    const char* url;
+    const char* expected;
+  } cases[] = {
+      {"null", ""},
+      {"not-a-standard-scheme:this is arbitrary content",
+       "this is arbitrary content"},
+      {"view-source:http://example.com/path", "http://example.com/path"},
+      {"blob:http://example.com/GUID", "http://example.com/GUID"},
+      {"blob://http://example.com/GUID", "//http://example.com/GUID"},
+      {"blob:http://user:password@example.com/GUID",
+       "http://user:password@example.com/GUID"},
+
+      {"http://www.example.com/GUID#ref", "/GUID"},
+      {"http://me:secret@example.com/GUID/#ref", "/GUID/"},
+      {"data:text/html,Question?<div style=\"color: #bad\">idea</div>",
+       "text/html,Question"},
+
+      // TODO(mkwst): This seems like a bug. https://crbug.com/513600
+      {"filesystem:http://example.com/path", "/"},
+  };
+
+  for (const auto& test : cases) {
+    GURL url(test.url);
+    EXPECT_EQ(test.expected, url.path()) << test.url;
+  }
+}
+
+TEST(GURLTest, EqualsIgnoringRef) {
+  const struct {
+    const char* url_a;
+    const char* url_b;
+    bool are_equals;
+  } kTestCases[] = {
+      // No ref.
+      {"http://a.com", "http://a.com", true},
+      {"http://a.com", "http://b.com", false},
+
+      // Same Ref.
+      {"http://a.com#foo", "http://a.com#foo", true},
+      {"http://a.com#foo", "http://b.com#foo", false},
+
+      // Different Refs.
+      {"http://a.com#foo", "http://a.com#bar", true},
+      {"http://a.com#foo", "http://b.com#bar", false},
+
+      // One has a ref, the other doesn't.
+      {"http://a.com#foo", "http://a.com", true},
+      {"http://a.com#foo", "http://b.com", false},
+
+      // Empty refs.
+      {"http://a.com#", "http://a.com#", true},
+      {"http://a.com#", "http://a.com", true},
+
+      // URLs that differ only by their last character.
+      {"http://aaa", "http://aab", false},
+      {"http://aaa#foo", "http://aab#foo", false},
+
+      // Different size of the part before the ref.
+      {"http://123#a", "http://123456#a", false},
+
+      // Blob URLs
+      {"blob:http://a.com#foo", "blob:http://a.com#foo", true},
+      {"blob:http://a.com#foo", "blob:http://a.com#bar", true},
+      {"blob:http://a.com#foo", "blob:http://b.com#bar", false},
+
+      // Filesystem URLs
+      {"filesystem:http://a.com#foo", "filesystem:http://a.com#foo", true},
+      {"filesystem:http://a.com#foo", "filesystem:http://a.com#bar", true},
+      {"filesystem:http://a.com#foo", "filesystem:http://b.com#bar", false},
+
+      // Data URLs
+      {"data:text/html,a#foo", "data:text/html,a#bar", true},
+      {"data:text/html,a#foo", "data:text/html,a#foo", true},
+      {"data:text/html,a#foo", "data:text/html,b#foo", false},
+  };
+
+  for (const auto& test_case : kTestCases) {
+    SCOPED_TRACE(testing::Message()
+                 << std::endl
+                 << "url_a = " << test_case.url_a << std::endl
+                 << "url_b = " << test_case.url_b << std::endl);
+    // A versus B.
+    EXPECT_EQ(test_case.are_equals,
+              GURL(test_case.url_a).EqualsIgnoringRef(GURL(test_case.url_b)));
+    // B versus A.
+    EXPECT_EQ(test_case.are_equals,
+              GURL(test_case.url_b).EqualsIgnoringRef(GURL(test_case.url_a)));
+  }
+}
+
+TEST(GURLTest, DebugAlias) {
+  GURL url("https://foo.com/bar");
+  DEBUG_ALIAS_FOR_GURL(url_debug_alias, url);
+  EXPECT_STREQ("https://foo.com/bar", url_debug_alias);
+}
+
+TEST(GURLTest, InvalidHost) {
+  // This contains an invalid percent escape (%T%) and also a valid
+  // percent escape that's not 7-bit ascii (%ae), so that the unescaped
+  // host contains both an invalid percent escape and invalid UTF-8.
+  GURL url("http://%T%Ae");
+
+  EXPECT_FALSE(url.is_valid());
+  EXPECT_TRUE(url.SchemeIs(url::kHttpScheme));
+
+  // The invalid percent escape becomes an escaped percent sign (%25), and the
+  // invalid UTF-8 character becomes REPLACEMENT CHARACTER' (U+FFFD) encoded as
+  // UTF-8.
+  EXPECT_EQ(url.host_piece(), "%25t%EF%BF%BD");
+}
+
+TEST(GURLTest, PortZero) {
+  GURL port_zero_url("http://127.0.0.1:0/blah");
+
+  // https://url.spec.whatwg.org/#port-state says that the port 1) consists of
+  // ASCII digits (this excludes negative numbers) and 2) cannot be greater than
+  // 2^16-1.  This means that port=0 should be valid.
+  EXPECT_TRUE(port_zero_url.is_valid());
+  EXPECT_EQ("0", port_zero_url.port());
+  EXPECT_EQ("127.0.0.1", port_zero_url.host());
+  EXPECT_EQ("http", port_zero_url.scheme());
+
+  // https://crbug.com/1065532: SchemeHostPort would previously incorrectly
+  // consider port=0 to be invalid.
+  SchemeHostPort scheme_host_port(port_zero_url);
+  EXPECT_TRUE(scheme_host_port.IsValid());
+  EXPECT_EQ(port_zero_url.scheme(), scheme_host_port.scheme());
+  EXPECT_EQ(port_zero_url.host(), scheme_host_port.host());
+  EXPECT_EQ(port_zero_url.port(),
+            base::NumberToString(scheme_host_port.port()));
+
+  // https://crbug.com/1065532: The SchemeHostPort problem above would lead to
+  // bizarre results below - resolved origin would incorrectly be returned as an
+  // opaque origin derived from |another_origin|.
+  url::Origin another_origin = url::Origin::Create(GURL("http://other.com"));
+  url::Origin resolved_origin =
+      url::Origin::Resolve(port_zero_url, another_origin);
+  EXPECT_FALSE(resolved_origin.opaque());
+  EXPECT_EQ(port_zero_url.scheme(), resolved_origin.scheme());
+  EXPECT_EQ(port_zero_url.host(), resolved_origin.host());
+  EXPECT_EQ(port_zero_url.port(), base::NumberToString(resolved_origin.port()));
+
+  // port=0 and default HTTP port are different.
+  GURL default_port("http://127.0.0.1/foo");
+  EXPECT_EQ(0, SchemeHostPort(port_zero_url).port());
+  EXPECT_EQ(80, SchemeHostPort(default_port).port());
+  url::Origin default_port_origin = url::Origin::Create(default_port);
+  EXPECT_FALSE(default_port_origin.IsSameOriginWith(resolved_origin));
+}
+
+class GURLTestTraits {
+ public:
+  using UrlType = GURL;
+
+  static UrlType CreateUrlFromString(base::StringPiece s) { return GURL(s); }
+  static bool IsAboutBlank(const UrlType& url) { return url.IsAboutBlank(); }
+  static bool IsAboutSrcdoc(const UrlType& url) { return url.IsAboutSrcdoc(); }
+
+  // Only static members.
+  GURLTestTraits() = delete;
+};
+
+INSTANTIATE_TYPED_TEST_SUITE_P(GURL, AbstractUrlTest, GURLTestTraits);
+
+}  // namespace url
diff --git a/ipc/BUILD.gn b/ipc/BUILD.gn
new file mode 100644
index 00000000000..d7801af54d7
--- /dev/null
+++ b/ipc/BUILD.gn
@@ -0,0 +1,38 @@
+# Copyright 2016 The Chromium Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//testing/test.gni")
+
+component("url_ipc") {
+  sources = [
+    "url_ipc_export.h",
+    "url_param_traits.cc",
+    "url_param_traits.h",
+  ]
+
+  defines = [ "URL_IPC_IMPLEMENTATION" ]
+
+  public_deps = [
+    "//ipc",
+    "//url",
+  ]
+  deps = [ "//base" ]
+}
+
+# IPC unit tests aren't build on iOS.
+if (!is_ios) {
+  source_set("url_ipc_unittests") {
+    testonly = true
+
+    sources = [ "url_param_traits_unittest.cc" ]
+
+    deps = [
+      ":url_ipc",
+      "//base",
+      "//ipc:test_support",
+      "//testing/gtest",
+      "//url:url",
+    ]
+  }
+}
diff --git a/ipc/OWNERS b/ipc/OWNERS
new file mode 100644
index 00000000000..146c3c3cd62
--- /dev/null
+++ b/ipc/OWNERS
@@ -0,0 +1,2 @@
+per-file *_param_traits*.*=set noparent
+per-file *_param_traits*.*=file://ipc/SECURITY_OWNERS
diff --git a/ipc/url_ipc_export.h b/ipc/url_ipc_export.h
new file mode 100644
index 00000000000..ca500ab0c90
--- /dev/null
+++ b/ipc/url_ipc_export.h
@@ -0,0 +1,29 @@
+// Copyright 2016 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_IPC_URL_IPC_EXPORT_H_
+#define URL_IPC_URL_IPC_EXPORT_H_
+
+#if defined(COMPONENT_BUILD)
+#if defined(WIN32)
+
+#if defined(URL_IPC_IMPLEMENTATION)
+#define URL_IPC_EXPORT __declspec(dllexport)
+#else
+#define URL_IPC_EXPORT __declspec(dllimport)
+#endif  // defined(URL_IPC_IMPLEMENTATION)
+
+#else  // defined(WIN32)
+#if defined(URL_IPC_IMPLEMENTATION)
+#define URL_IPC_EXPORT __attribute__((visibility("default")))
+#else
+#define URL_IPC_EXPORT
+#endif
+#endif
+
+#else  // defined(COMPONENT_BUILD)
+#define URL_IPC_EXPORT
+#endif
+
+#endif  // URL_IPC_URL_IPC_EXPORT_H_
diff --git a/ipc/url_param_traits.cc b/ipc/url_param_traits.cc
new file mode 100644
index 00000000000..6999c82d586
--- /dev/null
+++ b/ipc/url_param_traits.cc
@@ -0,0 +1,56 @@
+// Copyright 2016 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/ipc/url_param_traits.h"
+
+#include <string>
+
+#include "base/pickle.h"
+#include "url/gurl.h"
+#include "url/url_constants.h"
+
+namespace IPC {
+
+void ParamTraits<GURL>::Write(base::Pickle* m, const GURL& p) {
+  if (p.possibly_invalid_spec().length() > url::kMaxURLChars) {
+    m->WriteString(std::string());
+    return;
+  }
+
+  // Beware of print-parse inconsistency which would change an invalid
+  // URL into a valid one. Ideally, the message would contain this flag
+  // so that the read side could make the check, but performing it here
+  // avoids changing the on-the-wire representation of such a fundamental
+  // type as GURL. See https://crbug.com/166486 for additional work in
+  // this area.
+  if (!p.is_valid()) {
+    m->WriteString(std::string());
+    return;
+  }
+
+  m->WriteString(p.possibly_invalid_spec());
+  // TODO(brettw) bug 684583: Add encoding for query params.
+}
+
+bool ParamTraits<GURL>::Read(const base::Pickle* m,
+                             base::PickleIterator* iter,
+                             GURL* p) {
+  std::string s;
+  if (!iter->ReadString(&s) || s.length() > url::kMaxURLChars) {
+    *p = GURL();
+    return false;
+  }
+  *p = GURL(s);
+  if (!s.empty() && !p->is_valid()) {
+    *p = GURL();
+    return false;
+  }
+  return true;
+}
+
+void ParamTraits<GURL>::Log(const GURL& p, std::string* l) {
+  l->append(p.spec());
+}
+
+}  // namespace IPC
diff --git a/ipc/url_param_traits.h b/ipc/url_param_traits.h
new file mode 100644
index 00000000000..cdb57242402
--- /dev/null
+++ b/ipc/url_param_traits.h
@@ -0,0 +1,33 @@
+// Copyright 2016 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_IPC_URL_PARAM_TRAITS_H_
+#define URL_IPC_URL_PARAM_TRAITS_H_
+
+#include <string>
+
+#include "ipc/ipc_param_traits.h"
+#include "url/gurl.h"
+#include "url/ipc/url_ipc_export.h"
+
+namespace base {
+class Pickle;
+class PickleIterator;
+}  // namespace base
+
+namespace IPC {
+
+template <>
+struct URL_IPC_EXPORT ParamTraits<GURL> {
+  typedef GURL param_type;
+  static void Write(base::Pickle* m, const param_type& p);
+  static bool Read(const base::Pickle* m,
+                   base::PickleIterator* iter,
+                   param_type* p);
+  static void Log(const param_type& p, std::string* l);
+};
+
+}  // namespace IPC
+
+#endif  // URL_IPC_URL_PARAM_TRAITS_H_
diff --git a/ipc/url_param_traits_unittest.cc b/ipc/url_param_traits_unittest.cc
new file mode 100644
index 00000000000..78bed35ed23
--- /dev/null
+++ b/ipc/url_param_traits_unittest.cc
@@ -0,0 +1,159 @@
+// Copyright 2016 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <string>
+
+#include "ipc/ipc_message.h"
+#include "ipc/ipc_message_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/gurl.h"
+#include "url/ipc/url_param_traits.h"
+
+namespace {
+
+GURL BounceUrl(const GURL& input) {
+  IPC::Message msg(1, 2, IPC::Message::PRIORITY_NORMAL);
+  IPC::ParamTraits<GURL>::Write(&msg, input);
+
+  GURL output;
+  base::PickleIterator iter(msg);
+  EXPECT_TRUE(IPC::ParamTraits<GURL>::Read(&msg, &iter, &output));
+
+  return output;
+}
+
+void ExpectSerializationRoundtrips(const GURL& input) {
+  SCOPED_TRACE(testing::Message()
+               << "Input GURL: " << input.possibly_invalid_spec());
+  GURL output = BounceUrl(input);
+
+  // We want to test each component individually to make sure its range was
+  // correctly serialized and deserialized, not just the spec.
+  EXPECT_EQ(input.possibly_invalid_spec(), output.possibly_invalid_spec());
+  EXPECT_EQ(input.is_valid(), output.is_valid());
+  EXPECT_EQ(input.scheme(), output.scheme());
+  EXPECT_EQ(input.username(), output.username());
+  EXPECT_EQ(input.password(), output.password());
+  EXPECT_EQ(input.host(), output.host());
+  EXPECT_EQ(input.port(), output.port());
+  EXPECT_EQ(input.path(), output.path());
+  EXPECT_EQ(input.query(), output.query());
+  EXPECT_EQ(input.ref(), output.ref());
+}
+
+}  // namespace
+
+// Tests that serialize/deserialize correctly understand each other.
+TEST(IPCMessageTest, SerializeGurl_Basic) {
+  const char* serialize_cases[] = {
+    "http://www.google.com/",
+    "http://user:pass@host.com:888/foo;bar?baz#nop",
+  };
+
+  for (const char* test_input : serialize_cases) {
+    SCOPED_TRACE(testing::Message() << "Test input: " << test_input);
+    GURL input(test_input);
+    ExpectSerializationRoundtrips(input);
+  }
+}
+
+// Test of an excessively long GURL.
+TEST(IPCMessageTest, SerializeGurl_ExcessivelyLong) {
+  const std::string url =
+      std::string("http://example.org/").append(url::kMaxURLChars + 1, 'a');
+  GURL input(url.c_str());
+  GURL output = BounceUrl(input);
+  EXPECT_TRUE(output.is_empty());
+}
+
+// Test of an invalid GURL.
+TEST(IPCMessageTest, SerializeGurl_InvalidUrl) {
+  IPC::Message msg;
+  msg.WriteString("#inva://idurl/");
+  GURL output;
+  base::PickleIterator iter(msg);
+  EXPECT_FALSE(IPC::ParamTraits<GURL>::Read(&msg, &iter, &output));
+}
+
+// Test of a corrupt deserialization input.
+TEST(IPCMessageTest, SerializeGurl_CorruptPayload) {
+  IPC::Message msg(1, 2, IPC::Message::PRIORITY_NORMAL);
+  msg.WriteInt(99);
+  GURL output;
+  base::PickleIterator iter(msg);
+  EXPECT_FALSE(IPC::ParamTraits<GURL>::Read(&msg, &iter, &output));
+}
+
+// Test for the GURL testcase based on https://crbug.com/1214098 (which in turn
+// was based on ContentSecurityPolicyBrowserTest.FileURLs).
+TEST(IPCMessageTest, SerializeGurl_WindowsDriveInPathReplacement) {
+  {
+    // #1: Try creating a file URL with a non-empty hostname.
+    GURL url_without_windows_drive_letter("file://hostname/");
+    EXPECT_EQ("/", url_without_windows_drive_letter.path());
+    EXPECT_EQ("hostname", url_without_windows_drive_letter.host());
+    ExpectSerializationRoundtrips(url_without_windows_drive_letter);
+  }
+
+  {
+    // #2: Use GURL::Replacement to create a GURL with 1) a path that starts
+    // with a Windows drive letter and 2) has a non-empty hostname (inherited
+    // from `url_without_windows_drive_letter` above). This used to not go
+    // through the DoParseUNC path that normally strips the hostname (for more
+    // details, see https://crbug.com/1214098#c4).
+    GURL::Replacements repl;
+    const std::string kNewPath = "/C:/dir/file.txt";
+    repl.SetPathStr(kNewPath);
+    GURL url_made_with_replace_components =
+        GURL("file://hostname/").ReplaceComponents(repl);
+
+    EXPECT_EQ(kNewPath, url_made_with_replace_components.path());
+    EXPECT_EQ("hostname", url_made_with_replace_components.host());
+    EXPECT_EQ("file://hostname/C:/dir/file.txt",
+              url_made_with_replace_components.spec());
+    // This is the MAIN VERIFICATION in this test. This used to fail on Windows,
+    // see https://crbug.com/1214098.
+    ExpectSerializationRoundtrips(url_made_with_replace_components);
+  }
+
+  {
+    // #3: Try to create a URL with a Windows drive letter and a non-empty
+    // hostname directly.
+    GURL url_created_directly("file://hostname/C:/dir/file.txt");
+    EXPECT_EQ("/C:/dir/file.txt", url_created_directly.path());
+    EXPECT_EQ("hostname", url_created_directly.host());
+    EXPECT_EQ("file://hostname/C:/dir/file.txt", url_created_directly.spec());
+    ExpectSerializationRoundtrips(url_created_directly);
+
+    // The URL created directly and the URL created through ReplaceComponents
+    // should be the same.
+    GURL::Replacements repl;
+    const std::string kNewPath = "/C:/dir/file.txt";
+    repl.SetPathStr(kNewPath);
+    GURL url_made_with_replace_components =
+        GURL("file://hostname/").ReplaceComponents(repl);
+    EXPECT_EQ(url_created_directly.spec(),
+              url_made_with_replace_components.spec());
+  }
+
+  {
+    // #4: Try to create a URL with a Windows drive letter and "localhost" as
+    // hostname directly.
+    GURL url_created_directly("file://localhost/C:/dir/file.txt");
+    EXPECT_EQ("/C:/dir/file.txt", url_created_directly.path());
+    EXPECT_EQ("", url_created_directly.host());
+    EXPECT_EQ("file:///C:/dir/file.txt", url_created_directly.spec());
+    ExpectSerializationRoundtrips(url_created_directly);
+
+    // The URL created directly and the URL created through ReplaceComponents
+    // should be the same.
+    GURL::Replacements repl;
+    const std::string kNewPath = "/C:/dir/file.txt";
+    repl.SetPathStr(kNewPath);
+    GURL url_made_with_replace_components =
+        GURL("file://localhost/").ReplaceComponents(repl);
+    EXPECT_EQ(url_created_directly.spec(),
+              url_made_with_replace_components.spec());
+  }
+}
diff --git a/mojom/BUILD.gn b/mojom/BUILD.gn
new file mode 100644
index 00000000000..a936a432bb3
--- /dev/null
+++ b/mojom/BUILD.gn
@@ -0,0 +1,141 @@
+# Copyright 2016 The Chromium Authors
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import("//mojo/public/tools/bindings/mojom.gni")
+
+mojom("url_mojom_gurl") {
+  generate_java = true
+  sources = [ "url.mojom" ]
+
+  cpp_typemaps = [
+    {
+      types = [
+        {
+          mojom = "url.mojom.Url"
+          cpp = "::GURL"
+        },
+      ]
+      traits_headers = [ "//url/mojom/url_gurl_mojom_traits.h" ]
+      traits_public_deps = [
+        ":mojom_traits",
+        "//url",
+      ]
+    },
+  ]
+
+  blink_cpp_typemaps = [
+    {
+      types = [
+        {
+          mojom = "url.mojom.Url"
+          cpp = "::blink::KURL"
+          force_serialize = true
+        },
+      ]
+      traits_headers = [
+        "//third_party/blink/renderer/platform/mojo/kurl_mojom_traits.h",
+        "//third_party/blink/renderer/platform/weborigin/kurl_hash.h",
+      ]
+      traits_public_deps = [ "//url" ]
+    },
+  ]
+
+  webui_module_path = "chrome://resources/mojo/url/mojom"
+}
+
+mojom("url_mojom_origin") {
+  generate_java = true
+  sources = [ "origin.mojom" ]
+
+  public_deps = [
+    ":url_mojom_gurl",
+    "//mojo/public/mojom/base",
+  ]
+
+  check_includes_blink = false
+
+  cpp_typemaps = [
+    {
+      types = [
+        {
+          mojom = "url.mojom.Origin"
+          cpp = "::url::Origin"
+        },
+      ]
+      traits_headers = [ "//url/mojom/origin_mojom_traits.h" ]
+      traits_public_deps = [
+        ":mojom_traits",
+        "//url",
+      ]
+    },
+  ]
+
+  blink_cpp_typemaps = [
+    {
+      types = [
+        {
+          mojom = "url.mojom.Origin"
+          cpp = "::scoped_refptr<const ::blink::SecurityOrigin>"
+          nullable_is_same_type = true
+        },
+      ]
+      traits_headers = [ "//third_party/blink/renderer/platform/mojo/security_origin_mojom_traits.h" ]
+      traits_public_deps = [ "//url" ]
+    },
+  ]
+
+  webui_module_path = "chrome://resources/mojo/url/mojom"
+}
+
+mojom("url_mojom_scheme_host_port") {
+  generate_java = true
+  sources = [ "scheme_host_port.mojom" ]
+
+  cpp_typemaps = [
+    {
+      types = [
+        {
+          mojom = "url.mojom.SchemeHostPort"
+          cpp = "::url::SchemeHostPort"
+        },
+      ]
+      traits_headers = [ "//url/mojom/scheme_host_port_mojom_traits.h" ]
+      traits_public_deps = [ "//url" ]
+    },
+  ]
+}
+
+mojom("test_url_mojom_gurl") {
+  sources = [ "url_test.mojom" ]
+
+  public_deps = [
+    ":url_mojom_gurl",
+    ":url_mojom_origin",
+    ":url_mojom_scheme_host_port",
+  ]
+}
+
+component("mojom_traits") {
+  output_name = "url_mojom_traits"
+
+  sources = [
+    "origin_mojom_traits.cc",
+    "origin_mojom_traits.h",
+    "scheme_host_port_mojom_traits.cc",
+    "scheme_host_port_mojom_traits.h",
+    "url_gurl_mojom_traits.cc",
+    "url_gurl_mojom_traits.h",
+  ]
+
+  defines = [ "IS_URL_MOJOM_TRAITS_IMPL" ]
+
+  public_deps = [
+    ":url_mojom_gurl_shared",
+    ":url_mojom_origin_shared",
+    ":url_mojom_scheme_host_port",
+    "//base",
+    "//mojo/public/cpp/base:shared_typemap_traits",
+    "//url",
+  ]
+}
diff --git a/mojom/DEPS b/mojom/DEPS
new file mode 100644
index 00000000000..093b1d9fde5
--- /dev/null
+++ b/mojom/DEPS
@@ -0,0 +1,3 @@
+include_rules = [
+  "+mojo/public/cpp",
+]
diff --git a/mojom/DIR_METADATA b/mojom/DIR_METADATA
new file mode 100644
index 00000000000..c080aa16485
--- /dev/null
+++ b/mojom/DIR_METADATA
@@ -0,0 +1,11 @@
+# Metadata information for this directory.
+#
+# For more information on DIR_METADATA files, see:
+#   https://source.chromium.org/chromium/infra/infra/+/main:go/src/infra/tools/dirmd/README.md
+#
+# For the schema of this file, see Metadata message:
+#   https://source.chromium.org/chromium/infra/infra/+/main:go/src/infra/tools/dirmd/proto/dir_metadata.proto
+
+monorail {
+  component: "Internals>Mojo"
+}
\ No newline at end of file
diff --git a/mojom/OWNERS b/mojom/OWNERS
new file mode 100644
index 00000000000..1feb5149750
--- /dev/null
+++ b/mojom/OWNERS
@@ -0,0 +1,4 @@
+per-file *.mojom=set noparent
+per-file *.mojom=file://ipc/SECURITY_OWNERS
+per-file *_mojom_traits*.*=set noparent
+per-file *_mojom_traits*.*=file://ipc/SECURITY_OWNERS
diff --git a/mojom/origin.mojom b/mojom/origin.mojom
new file mode 100644
index 00000000000..94b6e2d587c
--- /dev/null
+++ b/mojom/origin.mojom
@@ -0,0 +1,19 @@
+// Copyright 2016 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+[JavaPackage="org.chromium.url.internal.mojom"]
+module url.mojom;
+
+import "mojo/public/mojom/base/unguessable_token.mojom";
+
+struct Origin {
+  string scheme;
+  string host;
+  uint16 port;
+
+  // When a nonce is provided, this origin is opaque. The scheme/host/port do
+  // not need to be valid, but if they are, they identify the tuple origin
+  // from which this opaque origin is derived.
+  mojo_base.mojom.UnguessableToken? nonce_if_opaque;
+};
diff --git a/mojom/origin_mojom_traits.cc b/mojom/origin_mojom_traits.cc
new file mode 100644
index 00000000000..9e8475ac53e
--- /dev/null
+++ b/mojom/origin_mojom_traits.cc
@@ -0,0 +1,34 @@
+// Copyright 2020 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/mojom/origin_mojom_traits.h"
+
+#include "base/strings/string_piece.h"
+
+namespace mojo {
+
+// static
+bool StructTraits<url::mojom::OriginDataView, url::Origin>::Read(
+    url::mojom::OriginDataView data,
+    url::Origin* out) {
+  base::StringPiece scheme, host;
+  absl::optional<base::UnguessableToken> nonce_if_opaque;
+  if (!data.ReadScheme(&scheme) || !data.ReadHost(&host) ||
+      !data.ReadNonceIfOpaque(&nonce_if_opaque))
+    return false;
+
+  absl::optional<url::Origin> creation_result =
+      nonce_if_opaque
+          ? url::Origin::UnsafelyCreateOpaqueOriginWithoutNormalization(
+                scheme, host, data.port(), url::Origin::Nonce(*nonce_if_opaque))
+          : url::Origin::UnsafelyCreateTupleOriginWithoutNormalization(
+                scheme, host, data.port());
+  if (!creation_result)
+    return false;
+
+  *out = std::move(creation_result.value());
+  return true;
+}
+
+}  // namespace mojo
diff --git a/mojom/origin_mojom_traits.h b/mojom/origin_mojom_traits.h
new file mode 100644
index 00000000000..0d3cbb276cb
--- /dev/null
+++ b/mojom/origin_mojom_traits.h
@@ -0,0 +1,39 @@
+// Copyright 2016 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_MOJOM_ORIGIN_MOJOM_TRAITS_H_
+#define URL_MOJOM_ORIGIN_MOJOM_TRAITS_H_
+
+#include "base/component_export.h"
+#include "base/unguessable_token.h"
+#include "mojo/public/cpp/base/unguessable_token_mojom_traits.h"
+#include "mojo/public/cpp/bindings/optional_as_pointer.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+#include "url/mojom/origin.mojom-shared.h"
+#include "url/origin.h"
+
+namespace mojo {
+
+template <>
+struct COMPONENT_EXPORT(URL_MOJOM_TRAITS)
+    StructTraits<url::mojom::OriginDataView, url::Origin> {
+  static const std::string& scheme(const url::Origin& r) {
+    return r.GetTupleOrPrecursorTupleIfOpaque().scheme();
+  }
+  static const std::string& host(const url::Origin& r) {
+    return r.GetTupleOrPrecursorTupleIfOpaque().host();
+  }
+  static uint16_t port(const url::Origin& r) {
+    return r.GetTupleOrPrecursorTupleIfOpaque().port();
+  }
+  static mojo::OptionalAsPointer<const base::UnguessableToken> nonce_if_opaque(
+      const url::Origin& r) {
+    return mojo::MakeOptionalAsPointer(r.GetNonceForSerialization());
+  }
+  static bool Read(url::mojom::OriginDataView data, url::Origin* out);
+};
+
+}  // namespace mojo
+
+#endif  // URL_MOJOM_ORIGIN_MOJOM_TRAITS_H_
diff --git a/mojom/scheme_host_port.mojom b/mojom/scheme_host_port.mojom
new file mode 100644
index 00000000000..79f37221d84
--- /dev/null
+++ b/mojom/scheme_host_port.mojom
@@ -0,0 +1,13 @@
+// Copyright 2021 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+module url.mojom;
+
+// See url::SchemeHostPort for details, and what differentiates this from an
+// origin.
+struct SchemeHostPort {
+  string scheme;
+  string host;
+  uint16 port;
+};
diff --git a/mojom/scheme_host_port_mojom_traits.cc b/mojom/scheme_host_port_mojom_traits.cc
new file mode 100644
index 00000000000..63f6af4c101
--- /dev/null
+++ b/mojom/scheme_host_port_mojom_traits.cc
@@ -0,0 +1,27 @@
+// Copyright 2021 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/mojom/scheme_host_port_mojom_traits.h"
+
+#include "base/strings/string_piece.h"
+#include "url/mojom/scheme_host_port.mojom-shared.h"
+#include "url/scheme_host_port.h"
+
+namespace mojo {
+
+// static
+bool StructTraits<url::mojom::SchemeHostPortDataView, url::SchemeHostPort>::
+    Read(url::mojom::SchemeHostPortDataView data, url::SchemeHostPort* out) {
+  base::StringPiece scheme, host;
+  if (!data.ReadScheme(&scheme) || !data.ReadHost(&host))
+    return false;
+
+  *out = url::SchemeHostPort(scheme, host, data.port());
+
+  // Consider it an error if the output SchemeHostPort is not valid, but
+  // non-empty values were received over Mojo.
+  return out->IsValid() || (scheme.empty() && host.empty() && data.port() == 0);
+}
+
+}  // namespace mojo
diff --git a/mojom/scheme_host_port_mojom_traits.h b/mojom/scheme_host_port_mojom_traits.h
new file mode 100644
index 00000000000..e91ae3a0c29
--- /dev/null
+++ b/mojom/scheme_host_port_mojom_traits.h
@@ -0,0 +1,30 @@
+// Copyright 2021 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_MOJOM_SCHEME_HOST_PORT_MOJOM_TRAITS_H_
+#define URL_MOJOM_SCHEME_HOST_PORT_MOJOM_TRAITS_H_
+
+#include "base/component_export.h"
+#include "url/mojom/scheme_host_port.mojom-shared.h"
+#include "url/scheme_host_port.h"
+
+namespace mojo {
+
+template <>
+struct COMPONENT_EXPORT(URL_MOJOM_TRAITS)
+    StructTraits<url::mojom::SchemeHostPortDataView, url::SchemeHostPort> {
+  static const std::string& scheme(const url::SchemeHostPort& r) {
+    return r.scheme();
+  }
+  static const std::string& host(const url::SchemeHostPort& r) {
+    return r.host();
+  }
+  static uint16_t port(const url::SchemeHostPort& r) { return r.port(); }
+  static bool Read(url::mojom::SchemeHostPortDataView data,
+                   url::SchemeHostPort* out);
+};
+
+}  // namespace mojo
+
+#endif  // URL_MOJOM_SCHEME_HOST_PORT_MOJOM_TRAITS_H_
diff --git a/mojom/scheme_host_port_mojom_traits_unittest.cc b/mojom/scheme_host_port_mojom_traits_unittest.cc
new file mode 100644
index 00000000000..7efd5d7786e
--- /dev/null
+++ b/mojom/scheme_host_port_mojom_traits_unittest.cc
@@ -0,0 +1,36 @@
+// Copyright 2021 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/mojom/scheme_host_port_mojom_traits.h"
+
+#include <string>
+
+#include "mojo/public/cpp/test_support/test_utils.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/mojom/scheme_host_port.mojom.h"
+#include "url/scheme_host_port.h"
+
+namespace url {
+
+namespace {
+
+void TestRoundTrip(const url::SchemeHostPort& in) {
+  url::SchemeHostPort result;
+  ASSERT_TRUE(
+      mojo::test::SerializeAndDeserialize<mojom::SchemeHostPort>(in, result))
+      << in.Serialize();
+  EXPECT_EQ(in, result) << "Expected " << in.Serialize() << ", but got "
+                        << result.Serialize();
+}
+
+}  // namespace
+
+TEST(SchemeHostPortMojomTraitsTest, RoundTrip) {
+  TestRoundTrip(url::SchemeHostPort());
+  TestRoundTrip(url::SchemeHostPort("http", "test", 80));
+  TestRoundTrip(url::SchemeHostPort("https", "foo.test", 443));
+  TestRoundTrip(url::SchemeHostPort("file", "", 0));
+}
+
+}  // namespace url
diff --git a/mojom/url.mojom b/mojom/url.mojom
new file mode 100644
index 00000000000..e5fbee620b8
--- /dev/null
+++ b/mojom/url.mojom
@@ -0,0 +1,13 @@
+// Copyright 2016 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+module url.mojom;
+
+// 2 * 1024 * 1024
+const uint32 kMaxURLChars = 2097152;
+
+[Stable]
+struct Url {
+  string url;
+};
diff --git a/mojom/url_gurl_mojom_traits.cc b/mojom/url_gurl_mojom_traits.cc
new file mode 100644
index 00000000000..97b301a0ab2
--- /dev/null
+++ b/mojom/url_gurl_mojom_traits.cc
@@ -0,0 +1,40 @@
+// Copyright 2020 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/mojom/url_gurl_mojom_traits.h"
+
+#include "url/url_constants.h"
+
+namespace mojo {
+
+// static
+base::StringPiece StructTraits<url::mojom::UrlDataView, GURL>::url(
+    const GURL& r) {
+  if (r.possibly_invalid_spec().length() > url::kMaxURLChars || !r.is_valid()) {
+    return base::StringPiece();
+  }
+
+  return base::StringPiece(r.possibly_invalid_spec().c_str(),
+                           r.possibly_invalid_spec().length());
+}
+
+// static
+bool StructTraits<url::mojom::UrlDataView, GURL>::Read(
+    url::mojom::UrlDataView data,
+    GURL* out) {
+  base::StringPiece url_string;
+  if (!data.ReadUrl(&url_string))
+    return false;
+
+  if (url_string.length() > url::kMaxURLChars)
+    return false;
+
+  *out = GURL(url_string);
+  if (!url_string.empty() && !out->is_valid())
+    return false;
+
+  return true;
+}
+
+}  // namespace mojo
diff --git a/mojom/url_gurl_mojom_traits.h b/mojom/url_gurl_mojom_traits.h
new file mode 100644
index 00000000000..19ac049c8a4
--- /dev/null
+++ b/mojom/url_gurl_mojom_traits.h
@@ -0,0 +1,25 @@
+// Copyright 2016 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_MOJOM_URL_GURL_MOJOM_TRAITS_H_
+#define URL_MOJOM_URL_GURL_MOJOM_TRAITS_H_
+
+#include "base/component_export.h"
+#include "base/strings/string_piece.h"
+#include "mojo/public/cpp/bindings/struct_traits.h"
+#include "url/gurl.h"
+#include "url/mojom/url.mojom-shared.h"
+
+namespace mojo {
+
+template <>
+struct COMPONENT_EXPORT(URL_MOJOM_TRAITS)
+    StructTraits<url::mojom::UrlDataView, GURL> {
+  static base::StringPiece url(const GURL& r);
+  static bool Read(url::mojom::UrlDataView data, GURL* out);
+};
+
+}  // namespace mojo
+
+#endif  // URL_MOJOM_URL_GURL_MOJOM_TRAITS_H_
diff --git a/mojom/url_gurl_mojom_traits_unittest.cc b/mojom/url_gurl_mojom_traits_unittest.cc
new file mode 100644
index 00000000000..48968d24bfd
--- /dev/null
+++ b/mojom/url_gurl_mojom_traits_unittest.cc
@@ -0,0 +1,209 @@
+// Copyright 2016 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <utility>
+
+#include "base/test/task_environment.h"
+#include "mojo/public/cpp/bindings/pending_receiver.h"
+#include "mojo/public/cpp/bindings/receiver.h"
+#include "mojo/public/cpp/bindings/remote.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/mojom/url_test.mojom.h"
+
+namespace url {
+
+class UrlTestImpl : public mojom::UrlTest {
+ public:
+  explicit UrlTestImpl(mojo::PendingReceiver<mojom::UrlTest> receiver)
+      : receiver_(this, std::move(receiver)) {}
+
+  // UrlTest:
+  void BounceUrl(const GURL& in, BounceUrlCallback callback) override {
+    std::move(callback).Run(in);
+  }
+
+  void BounceOrigin(const Origin& in, BounceOriginCallback callback) override {
+    std::move(callback).Run(in);
+  }
+
+ private:
+  mojo::Receiver<UrlTest> receiver_;
+};
+
+class MojoGURLStructTraitsTest : public ::testing::Test {
+ public:
+  MojoGURLStructTraitsTest()
+      : url_test_impl_(url_test_remote_.BindNewPipeAndPassReceiver()) {}
+
+  GURL BounceUrl(const GURL& input) {
+    GURL output;
+    EXPECT_TRUE(url_test_remote_->BounceUrl(input, &output));
+    return output;
+  }
+
+  void ExpectSerializationRoundtrips(const GURL& input) {
+    SCOPED_TRACE(testing::Message()
+                 << "Input GURL: " << input.possibly_invalid_spec());
+    GURL output = BounceUrl(input);
+
+    // We want to test each component individually to make sure its range was
+    // correctly serialized and deserialized, not just the spec.
+    EXPECT_EQ(input.possibly_invalid_spec(), output.possibly_invalid_spec());
+    EXPECT_EQ(input.is_valid(), output.is_valid());
+    EXPECT_EQ(input.scheme(), output.scheme());
+    EXPECT_EQ(input.username(), output.username());
+    EXPECT_EQ(input.password(), output.password());
+    EXPECT_EQ(input.host(), output.host());
+    EXPECT_EQ(input.port(), output.port());
+    EXPECT_EQ(input.path(), output.path());
+    EXPECT_EQ(input.query(), output.query());
+    EXPECT_EQ(input.ref(), output.ref());
+  }
+
+  Origin BounceOrigin(const Origin& input) {
+    Origin output;
+    EXPECT_TRUE(url_test_remote_->BounceOrigin(input, &output));
+    return output;
+  }
+
+ private:
+  base::test::SingleThreadTaskEnvironment task_environment;
+  mojo::Remote<mojom::UrlTest> url_test_remote_;
+  UrlTestImpl url_test_impl_;
+};
+
+// Mojo version of chrome IPC test in url/ipc/url_param_traits_unittest.cc.
+TEST_F(MojoGURLStructTraitsTest, Basic) {
+  const char* serialize_cases[] = {
+      "http://www.google.com/",
+      "http://user:pass@host.com:888/foo;bar?baz#nop",
+  };
+
+  for (const char* test_input : serialize_cases) {
+    SCOPED_TRACE(testing::Message() << "Test input: " << test_input);
+    GURL input(test_input);
+    ExpectSerializationRoundtrips(input);
+  }
+}
+
+// Test of an excessively long GURL.
+TEST_F(MojoGURLStructTraitsTest, ExcessivelyLongUrl) {
+  const std::string url =
+      std::string("http://example.org/").append(kMaxURLChars + 1, 'a');
+  GURL input(url.c_str());
+  GURL output = BounceUrl(input);
+  EXPECT_TRUE(output.is_empty());
+}
+
+// Test for the GURL testcase based on https://crbug.com/1214098 (which in turn
+// was based on ContentSecurityPolicyBrowserTest.FileURLs).
+TEST_F(MojoGURLStructTraitsTest, WindowsDriveInPathReplacement) {
+  {
+    // #1: Try creating a file URL with a non-empty hostname.
+    GURL url_without_windows_drive_letter("file://hostname/");
+    EXPECT_EQ("/", url_without_windows_drive_letter.path());
+    EXPECT_EQ("hostname", url_without_windows_drive_letter.host());
+    ExpectSerializationRoundtrips(url_without_windows_drive_letter);
+  }
+
+  {
+    // #2: Use GURL::Replacement to create a GURL with 1) a path that starts
+    // with a Windows drive letter and 2) has a non-empty hostname (inherited
+    // from `url_without_windows_drive_letter` above). This used to not go
+    // through the DoParseUNC path that normally strips the hostname (for more
+    // details, see https://crbug.com/1214098#c4).
+    GURL::Replacements repl;
+    const std::string kNewPath = "/C:/dir/file.txt";
+    repl.SetPathStr(kNewPath);
+    GURL url_made_with_replace_components =
+        GURL("file://hostname/").ReplaceComponents(repl);
+
+    EXPECT_EQ(kNewPath, url_made_with_replace_components.path());
+    EXPECT_EQ("hostname", url_made_with_replace_components.host());
+    EXPECT_EQ("file://hostname/C:/dir/file.txt",
+              url_made_with_replace_components.spec());
+    // This is the MAIN VERIFICATION in this test. This used to fail on Windows,
+    // see https://crbug.com/1214098.
+    ExpectSerializationRoundtrips(url_made_with_replace_components);
+  }
+
+  {
+    // #3: Try to create a URL with a Windows drive letter and a non-empty
+    // hostname directly.
+    GURL url_created_directly("file://hostname/C:/dir/file.txt");
+    EXPECT_EQ("/C:/dir/file.txt", url_created_directly.path());
+    EXPECT_EQ("hostname", url_created_directly.host());
+    EXPECT_EQ("file://hostname/C:/dir/file.txt", url_created_directly.spec());
+    ExpectSerializationRoundtrips(url_created_directly);
+
+    // The URL created directly and the URL created through ReplaceComponents
+    // should be the same.
+    GURL::Replacements repl;
+    const std::string kNewPath = "/C:/dir/file.txt";
+    repl.SetPathStr(kNewPath);
+    GURL url_made_with_replace_components =
+        GURL("file://hostname/").ReplaceComponents(repl);
+    EXPECT_EQ(url_created_directly.spec(),
+              url_made_with_replace_components.spec());
+  }
+
+  {
+    // #4: Try to create a URL with a Windows drive letter and "localhost" as
+    // hostname directly.
+    GURL url_created_directly("file://localhost/C:/dir/file.txt");
+    EXPECT_EQ("/C:/dir/file.txt", url_created_directly.path());
+    EXPECT_EQ("", url_created_directly.host());
+    EXPECT_EQ("file:///C:/dir/file.txt", url_created_directly.spec());
+    ExpectSerializationRoundtrips(url_created_directly);
+
+    // The URL created directly and the URL created through ReplaceComponents
+    // should be the same.
+    GURL::Replacements repl;
+    const std::string kNewPath = "/C:/dir/file.txt";
+    repl.SetPathStr(kNewPath);
+    GURL url_made_with_replace_components =
+        GURL("file://localhost/").ReplaceComponents(repl);
+    EXPECT_EQ(url_created_directly.spec(),
+              url_made_with_replace_components.spec());
+  }
+}
+
+// Test of basic Origin serialization.
+TEST_F(MojoGURLStructTraitsTest, OriginSerialization) {
+  Origin non_unique = Origin::UnsafelyCreateTupleOriginWithoutNormalization(
+                          "http", "www.google.com", 80)
+                          .value();
+  Origin output = BounceOrigin(non_unique);
+  EXPECT_EQ(non_unique, output);
+  EXPECT_FALSE(output.opaque());
+
+  Origin unique1;
+  Origin unique2 = non_unique.DeriveNewOpaqueOrigin();
+  EXPECT_NE(unique1, unique2);
+  EXPECT_NE(unique2, unique1);
+  EXPECT_NE(unique2, non_unique);
+  output = BounceOrigin(unique1);
+  EXPECT_TRUE(output.opaque());
+  EXPECT_EQ(unique1, output);
+  Origin output2 = BounceOrigin(unique2);
+  EXPECT_EQ(unique2, output2);
+  EXPECT_NE(unique2, output);
+  EXPECT_NE(unique1, output2);
+
+  Origin normalized =
+      Origin::CreateFromNormalizedTuple("http", "www.google.com", 80);
+  EXPECT_EQ(normalized, non_unique);
+  output = BounceOrigin(normalized);
+  EXPECT_EQ(normalized, output);
+  EXPECT_EQ(non_unique, output);
+  EXPECT_FALSE(output.opaque());
+}
+
+// Test that the "kMaxURLChars" values are the same in url.mojom and
+// url_constants.cc.
+TEST_F(MojoGURLStructTraitsTest, TestMaxURLChars) {
+  EXPECT_EQ(kMaxURLChars, mojom::kMaxURLChars);
+}
+
+}  // namespace url
diff --git a/mojom/url_test.mojom b/mojom/url_test.mojom
new file mode 100644
index 00000000000..4dc00deed6b
--- /dev/null
+++ b/mojom/url_test.mojom
@@ -0,0 +1,16 @@
+// Copyright 2016 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+module url.mojom;
+
+import "url/mojom/origin.mojom";
+import "url/mojom/url.mojom";
+
+interface UrlTest {
+  [Sync]
+  BounceUrl(Url in) => (Url out);
+
+  [Sync]
+  BounceOrigin(Origin in) => (Origin out);
+};
diff --git a/origin.cc b/origin.cc
new file mode 100644
index 00000000000..38be245a472
--- /dev/null
+++ b/origin.cc
@@ -0,0 +1,482 @@
+// Copyright 2015 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/origin.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <ostream>
+#include <string>
+#include <tuple>
+#include <utility>
+
+#include "base/base64.h"
+#include "base/check.h"
+#include "base/check_op.h"
+#include "base/containers/contains.h"
+#include "base/containers/span.h"
+#include "base/debug/crash_logging.h"
+#include "base/pickle.h"
+#include "base/strings/strcat.h"
+#include "base/strings/string_piece.h"
+#include "base/trace_event/base_tracing.h"
+#include "base/unguessable_token.h"
+#include "url/gurl.h"
+#include "url/scheme_host_port.h"
+#include "url/url_constants.h"
+#include "url/url_util.h"
+
+namespace url {
+
+Origin::Origin() : nonce_(Nonce()) {}
+
+Origin Origin::Create(const GURL& url) {
+  if (!url.is_valid())
+    return Origin();
+
+  SchemeHostPort tuple;
+
+  if (url.SchemeIsFileSystem()) {
+    tuple = SchemeHostPort(*url.inner_url());
+  } else if (url.SchemeIsBlob()) {
+    // If we're dealing with a 'blob:' URL, https://url.spec.whatwg.org/#origin
+    // defines the origin as the origin of the URL which results from parsing
+    // the "path", which boils down to everything after the scheme. GURL's
+    // 'GetContent()' gives us exactly that.
+    tuple = SchemeHostPort(GURL(url.GetContent()));
+  } else {
+    tuple = SchemeHostPort(url);
+
+    // It's SchemeHostPort's responsibility to filter out unrecognized schemes;
+    // sanity check that this is happening.
+    DCHECK(!tuple.IsValid() || url.IsStandard() ||
+           base::Contains(GetLocalSchemes(), url.scheme_piece()) ||
+           AllowNonStandardSchemesForAndroidWebView());
+  }
+
+  if (!tuple.IsValid())
+    return Origin();
+  return Origin(std::move(tuple));
+}
+
+Origin Origin::Resolve(const GURL& url, const Origin& base_origin) {
+  if (url.SchemeIs(kAboutScheme) || url.is_empty())
+    return base_origin;
+  Origin result = Origin::Create(url);
+  if (!result.opaque())
+    return result;
+  return base_origin.DeriveNewOpaqueOrigin();
+}
+
+Origin::Origin(const Origin&) = default;
+Origin& Origin::operator=(const Origin&) = default;
+Origin::Origin(Origin&&) noexcept = default;
+Origin& Origin::operator=(Origin&&) noexcept = default;
+Origin::~Origin() = default;
+
+// static
+absl::optional<Origin> Origin::UnsafelyCreateTupleOriginWithoutNormalization(
+    base::StringPiece scheme,
+    base::StringPiece host,
+    uint16_t port) {
+  SchemeHostPort tuple(std::string(scheme), std::string(host), port,
+                       SchemeHostPort::CHECK_CANONICALIZATION);
+  if (!tuple.IsValid())
+    return absl::nullopt;
+  return Origin(std::move(tuple));
+}
+
+// static
+absl::optional<Origin> Origin::UnsafelyCreateOpaqueOriginWithoutNormalization(
+    base::StringPiece precursor_scheme,
+    base::StringPiece precursor_host,
+    uint16_t precursor_port,
+    const Origin::Nonce& nonce) {
+  SchemeHostPort precursor(std::string(precursor_scheme),
+                           std::string(precursor_host), precursor_port,
+                           SchemeHostPort::CHECK_CANONICALIZATION);
+  // For opaque origins, it is okay for the SchemeHostPort to be invalid;
+  // however, this should only arise when the arguments indicate the
+  // canonical representation of the invalid SchemeHostPort.
+  if (!precursor.IsValid() &&
+      !(precursor_scheme.empty() && precursor_host.empty() &&
+        precursor_port == 0)) {
+    return absl::nullopt;
+  }
+  return Origin(std::move(nonce), std::move(precursor));
+}
+
+// static
+Origin Origin::CreateFromNormalizedTuple(std::string scheme,
+                                         std::string host,
+                                         uint16_t port) {
+  SchemeHostPort tuple(std::move(scheme), std::move(host), port,
+                       SchemeHostPort::ALREADY_CANONICALIZED);
+  if (!tuple.IsValid())
+    return Origin();
+  return Origin(std::move(tuple));
+}
+
+// static
+Origin Origin::CreateOpaqueFromNormalizedPrecursorTuple(
+    std::string precursor_scheme,
+    std::string precursor_host,
+    uint16_t precursor_port,
+    const Origin::Nonce& nonce) {
+  SchemeHostPort precursor(std::move(precursor_scheme),
+                           std::move(precursor_host), precursor_port,
+                           SchemeHostPort::ALREADY_CANONICALIZED);
+  // For opaque origins, it is okay for the SchemeHostPort to be invalid.
+  return Origin(std::move(nonce), std::move(precursor));
+}
+
+std::string Origin::Serialize() const {
+  if (opaque())
+    return "null";
+
+  if (scheme() == kFileScheme)
+    return "file://";
+
+  return tuple_.Serialize();
+}
+
+GURL Origin::GetURL() const {
+  if (opaque())
+    return GURL();
+
+  if (scheme() == kFileScheme)
+    return GURL("file:///");
+
+  return tuple_.GetURL();
+}
+
+const base::UnguessableToken* Origin::GetNonceForSerialization() const {
+  return nonce_ ? &nonce_->token() : nullptr;
+}
+
+bool Origin::IsSameOriginWith(const Origin& other) const {
+  // scheme/host/port must match, even for opaque origins where |tuple_| holds
+  // the precursor origin.
+  return std::tie(tuple_, nonce_) == std::tie(other.tuple_, other.nonce_);
+}
+
+bool Origin::IsSameOriginWith(const GURL& url) const {
+  if (opaque())
+    return false;
+
+  // The `url::Origin::Create` call here preserves how IsSameOriginWith was used
+  // historically, even though in some scenarios it is not clearly correct:
+  // - Origin of about:blank and about:srcdoc cannot be correctly
+  //   computed/recovered.
+  // - Ideally passing an invalid `url` would be a caller error (e.g. a DCHECK).
+  // - The caller intent is not always clear wrt handling the outer-vs-inner
+  //   origins/URLs in blob: and filesystem: schemes.
+  return IsSameOriginWith(url::Origin::Create(url));
+}
+
+bool Origin::CanBeDerivedFrom(const GURL& url) const {
+  DCHECK(url.is_valid());
+
+  // For "no access" schemes, blink's SecurityOrigin will always create an
+  // opaque unique one. However, about: scheme is also registered as such but
+  // does not behave this way, therefore exclude it from this check.
+  if (base::Contains(url::GetNoAccessSchemes(), url.scheme()) &&
+      !url.SchemeIs(kAboutScheme)) {
+    // If |this| is not opaque, definitely return false as the expectation
+    // is for opaque origin.
+    if (!opaque())
+      return false;
+
+    // And if it is unique opaque origin, it definitely is fine. But if there
+    // is a precursor stored, we should fall through to compare the tuples.
+    if (!tuple_.IsValid())
+      return true;
+  }
+
+  SchemeHostPort url_tuple;
+
+  // Optimization for the common, success case: Scheme/Host/Port match on the
+  // precursor, and the URL is standard. Opaqueness does not matter as a tuple
+  // origin can always create an opaque tuple origin.
+  if (url.IsStandard()) {
+    // Note: if extra copies of the scheme and host are undesirable, this check
+    // can be implemented using StringPiece comparisons, but it has to account
+    // explicitly checks on port numbers.
+    if (url.SchemeIsFileSystem()) {
+      url_tuple = SchemeHostPort(*url.inner_url());
+    } else {
+      url_tuple = SchemeHostPort(url);
+    }
+    return url_tuple == tuple_;
+
+    // Blob URLs still contain an inner origin, however it is not accessible
+    // through inner_url(), therefore it requires specific case to handle it.
+  } else if (url.SchemeIsBlob()) {
+    // If |this| doesn't contain any precursor information, it is an unique
+    // opaque origin. It is valid case, as any browser-initiated navigation
+    // to about:blank or data: URL will result in a document with such
+    // origin and it is valid for it to create blob: URLs.
+    if (!tuple_.IsValid())
+      return true;
+
+    url_tuple = SchemeHostPort(GURL(url.GetContent()));
+    return url_tuple == tuple_;
+  }
+
+  // At this point, the URL has non-standard scheme.
+  DCHECK(!url.IsStandard());
+
+  // All about: URLs (about:blank, about:srcdoc) inherit their origin from
+  // the context which navigated them, which means that they can be in any
+  // type of origin.
+  if (url.SchemeIs(kAboutScheme))
+    return true;
+
+  // All data: URLs commit in opaque origins, therefore |this| must be opaque
+  // if |url| has data: scheme.
+  if (url.SchemeIs(kDataScheme))
+    return opaque();
+
+  // If |this| does not have valid precursor tuple, it is unique opaque origin,
+  // which is what we expect non-standard schemes to get.
+  if (!tuple_.IsValid())
+    return true;
+
+  // However, when there is precursor present, the schemes must match.
+  return url.scheme() == tuple_.scheme();
+}
+
+bool Origin::DomainIs(base::StringPiece canonical_domain) const {
+  return !opaque() && url::DomainIs(tuple_.host(), canonical_domain);
+}
+
+bool Origin::operator<(const Origin& other) const {
+  return std::tie(tuple_, nonce_) < std::tie(other.tuple_, other.nonce_);
+}
+
+Origin Origin::DeriveNewOpaqueOrigin() const {
+  return Origin(Nonce(), tuple_);
+}
+
+std::string Origin::GetDebugString(bool include_nonce) const {
+  // Handle non-opaque origins first, as they are simpler.
+  if (!opaque()) {
+    std::string out = Serialize();
+    if (scheme() == kFileScheme)
+      base::StrAppend(&out, {" [internally: ", tuple_.Serialize(), "]"});
+    return out;
+  }
+
+  // For opaque origins, log the nonce and precursor as well. Without this,
+  // EXPECT_EQ failures between opaque origins are nearly impossible to
+  // understand.
+  std::string out = base::StrCat({Serialize(), " [internally:"});
+  if (include_nonce) {
+    out += " (";
+    if (nonce_->raw_token().is_empty())
+      out += "nonce TBD";
+    else
+      out += nonce_->raw_token().ToString();
+    out += ")";
+  }
+  if (!tuple_.IsValid())
+    base::StrAppend(&out, {" anonymous]"});
+  else
+    base::StrAppend(&out, {" derived from ", tuple_.Serialize(), "]"});
+  return out;
+}
+
+Origin::Origin(SchemeHostPort tuple) : tuple_(std::move(tuple)) {
+  DCHECK(!opaque());
+  DCHECK(tuple_.IsValid());
+}
+
+// Constructs an opaque origin derived from |precursor|.
+Origin::Origin(const Nonce& nonce, SchemeHostPort precursor)
+    : tuple_(std::move(precursor)), nonce_(std::move(nonce)) {
+  DCHECK(opaque());
+  // |precursor| is retained, but not accessible via scheme()/host()/port().
+  DCHECK_EQ("", scheme());
+  DCHECK_EQ("", host());
+  DCHECK_EQ(0U, port());
+}
+
+absl::optional<std::string> Origin::SerializeWithNonce() const {
+  return SerializeWithNonceImpl();
+}
+
+absl::optional<std::string> Origin::SerializeWithNonceAndInitIfNeeded() {
+  GetNonceForSerialization();
+  return SerializeWithNonceImpl();
+}
+
+// The pickle is saved in the following format, in order:
+// string - tuple_.GetURL().spec().
+// uint64_t (if opaque) - high bits of nonce if opaque. 0 if not initialized.
+// uint64_t (if opaque) - low bits of nonce if opaque. 0 if not initialized.
+absl::optional<std::string> Origin::SerializeWithNonceImpl() const {
+  if (!opaque() && !tuple_.IsValid())
+    return absl::nullopt;
+
+  base::Pickle pickle;
+  pickle.WriteString(tuple_.Serialize());
+  if (opaque() && !nonce_->raw_token().is_empty()) {
+    pickle.WriteUInt64(nonce_->token().GetHighForSerialization());
+    pickle.WriteUInt64(nonce_->token().GetLowForSerialization());
+  } else if (opaque()) {
+    // Nonce hasn't been initialized.
+    pickle.WriteUInt64(0);
+    pickle.WriteUInt64(0);
+  }
+
+  base::span<const uint8_t> data(static_cast<const uint8_t*>(pickle.data()),
+                                 pickle.size());
+  // Base64 encode the data to make it nicer to play with.
+  return base::Base64Encode(data);
+}
+
+// static
+absl::optional<Origin> Origin::Deserialize(const std::string& value) {
+  std::string data;
+  if (!base::Base64Decode(value, &data))
+    return absl::nullopt;
+  base::Pickle pickle(reinterpret_cast<char*>(&data[0]), data.size());
+  base::PickleIterator reader(pickle);
+
+  std::string pickled_url;
+  if (!reader.ReadString(&pickled_url))
+    return absl::nullopt;
+  GURL url(pickled_url);
+
+  // If only a tuple was serialized, then this origin is not opaque. For opaque
+  // origins, we expect two uint64's to be left in the pickle.
+  bool is_opaque = !reader.ReachedEnd();
+
+  // Opaque origins without a tuple are ok.
+  if (!is_opaque && !url.is_valid())
+    return absl::nullopt;
+  SchemeHostPort tuple(url);
+
+  // Possible successful early return if the pickled Origin was not opaque.
+  if (!is_opaque) {
+    Origin origin(tuple);
+    if (origin.opaque())
+      return absl::nullopt;  // Something went horribly wrong.
+    return origin;
+  }
+
+  uint64_t nonce_high = 0;
+  if (!reader.ReadUInt64(&nonce_high))
+    return absl::nullopt;
+
+  uint64_t nonce_low = 0;
+  if (!reader.ReadUInt64(&nonce_low))
+    return absl::nullopt;
+
+  absl::optional<base::UnguessableToken> nonce_token =
+      base::UnguessableToken::Deserialize(nonce_high, nonce_low);
+
+  Origin::Nonce nonce;
+  if (nonce_token.has_value()) {
+    // The serialized nonce wasn't empty, so copy it here.
+    nonce = Origin::Nonce(nonce_token.value());
+  }
+  Origin origin;
+  origin.nonce_ = std::move(nonce);
+  origin.tuple_ = tuple;
+  return origin;
+}
+
+void Origin::WriteIntoTrace(perfetto::TracedValue context) const {
+  std::move(context).WriteString(GetDebugString());
+}
+
+std::ostream& operator<<(std::ostream& out, const url::Origin& origin) {
+  out << origin.GetDebugString();
+  return out;
+}
+
+std::ostream& operator<<(std::ostream& out, const url::Origin::Nonce& nonce) {
+  // Subtle: don't let logging trigger lazy-generation of the token value.
+  if (nonce.raw_token().is_empty())
+    return (out << "(nonce TBD)");
+  else
+    return (out << nonce.raw_token());
+}
+
+bool IsSameOriginWith(const GURL& a, const GURL& b) {
+  return Origin::Create(a).IsSameOriginWith(Origin::Create(b));
+}
+
+Origin::Nonce::Nonce() = default;
+Origin::Nonce::Nonce(const base::UnguessableToken& token) : token_(token) {
+  CHECK(!token_.is_empty());
+}
+
+const base::UnguessableToken& Origin::Nonce::token() const {
+  // Inspecting the value of a nonce triggers lazy-generation.
+  // TODO(dcheng): UnguessableToken::is_empty should go away -- what sentinel
+  // value to use instead?
+  if (token_.is_empty())
+    token_ = base::UnguessableToken::Create();
+  return token_;
+}
+
+const base::UnguessableToken& Origin::Nonce::raw_token() const {
+  return token_;
+}
+
+// Copying a Nonce triggers lazy-generation of the token.
+Origin::Nonce::Nonce(const Origin::Nonce& other) : token_(other.token()) {}
+
+Origin::Nonce& Origin::Nonce::operator=(const Origin::Nonce& other) {
+  // Copying a Nonce triggers lazy-generation of the token.
+  token_ = other.token();
+  return *this;
+}
+
+// Moving a nonce does NOT trigger lazy-generation of the token.
+Origin::Nonce::Nonce(Origin::Nonce&& other) noexcept : token_(other.token_) {
+  other.token_ = base::UnguessableToken();  // Reset |other|.
+}
+
+Origin::Nonce& Origin::Nonce::operator=(Origin::Nonce&& other) noexcept {
+  token_ = other.token_;
+  other.token_ = base::UnguessableToken();  // Reset |other|.
+  return *this;
+}
+
+bool Origin::Nonce::operator<(const Origin::Nonce& other) const {
+  // When comparing, lazy-generation is required of both tokens, so that an
+  // ordering is established.
+  return token() < other.token();
+}
+
+bool Origin::Nonce::operator==(const Origin::Nonce& other) const {
+  // Equality testing doesn't actually require that the tokens be generated.
+  // If the tokens are both zero, equality only holds if they're the same
+  // object.
+  return (other.token_ == token_) && !(token_.is_empty() && (&other != this));
+}
+
+bool Origin::Nonce::operator!=(const Origin::Nonce& other) const {
+  return !(*this == other);
+}
+
+namespace debug {
+
+ScopedOriginCrashKey::ScopedOriginCrashKey(
+    base::debug::CrashKeyString* crash_key,
+    const url::Origin* value)
+    : scoped_string_value_(
+          crash_key,
+          value ? value->GetDebugString(false /* include_nonce */)
+                : "nullptr") {}
+
+ScopedOriginCrashKey::~ScopedOriginCrashKey() = default;
+
+}  // namespace debug
+
+}  // namespace url
diff --git a/origin.h b/origin.h
new file mode 100644
index 00000000000..a0575338ab8
--- /dev/null
+++ b/origin.h
@@ -0,0 +1,496 @@
+// Copyright 2015 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_ORIGIN_H_
+#define URL_ORIGIN_H_
+
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+
+#include "base/component_export.h"
+#include "base/debug/alias.h"
+#include "base/debug/crash_logging.h"
+#include "base/gtest_prod_util.h"
+#include "base/strings/string_piece_forward.h"
+#include "base/strings/string_util.h"
+#include "base/trace_event/base_tracing_forward.h"
+#include "base/unguessable_token.h"
+#include "build/build_config.h"
+#include "build/buildflag.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+#include "url/scheme_host_port.h"
+
+#if BUILDFLAG(IS_ANDROID)
+#include <jni.h>
+
+namespace base {
+namespace android {
+template <typename>
+class ScopedJavaLocalRef;
+template <typename>
+class JavaRef;
+}  // namespace android
+}  // namespace base
+#endif  // BUILDFLAG(IS_ANDROID)
+
+class GURL;
+
+namespace blink {
+class SecurityOrigin;
+class SecurityOriginTest;
+class StorageKey;
+class StorageKeyTest;
+}  // namespace blink
+
+namespace IPC {
+template <class P>
+struct ParamTraits;
+}  // namespace IPC
+
+namespace ipc_fuzzer {
+template <class T>
+struct FuzzTraits;
+}  // namespace ipc_fuzzer
+
+namespace mojo {
+template <typename DataViewType, typename T>
+struct StructTraits;
+struct UrlOriginAdapter;
+}  // namespace mojo
+
+namespace net {
+class SchemefulSite;
+}  // namespace net
+
+namespace url {
+
+namespace mojom {
+class OriginDataView;
+}  // namespace mojom
+
+// Per https://html.spec.whatwg.org/multipage/origin.html#origin, an origin is
+// either:
+// - a tuple origin of (scheme, host, port) as described in RFC 6454.
+// - an opaque origin with an internal value, and a memory of the tuple origin
+//   from which it was derived.
+//
+// TL;DR: If you need to make a security-relevant decision, use 'url::Origin'.
+// If you only need to extract the bits of a URL which are relevant for a
+// network connection, use 'url::SchemeHostPort'.
+//
+// STL;SDR: If you aren't making actual network connections, use 'url::Origin'.
+//
+// This class ought to be used when code needs to determine if two resources
+// are "same-origin", and when a canonical serialization of an origin is
+// required. Note that the canonical serialization of an origin *must not* be
+// used to determine if two resources are same-origin.
+//
+// A tuple origin, like 'SchemeHostPort', is composed of a tuple of (scheme,
+// host, port), but contains a number of additional concepts which make it
+// appropriate for use as a security boundary and access control mechanism
+// between contexts. Two tuple origins are same-origin if the tuples are equal.
+// A tuple origin may also be re-created from its serialization.
+//
+// An opaque origin has an internal globally unique identifier. When creating a
+// new opaque origin from a URL, a fresh globally unique identifier is
+// generated. However, if an opaque origin is copied or moved, the internal
+// globally unique identifier is preserved. Two opaque origins are same-origin
+// iff the globally unique identifiers match. Unlike tuple origins, an opaque
+// origin cannot be re-created from its serialization, which is always the
+// string "null".
+//
+// IMPORTANT: Since opaque origins always serialize as the string "null", it is
+// *never* safe to use the serialization for security checks!
+//
+// A tuple origin and an opaque origin are never same-origin.
+//
+// There are a few subtleties to note:
+//
+// * A default constructed Origin is opaque, with no precursor origin.
+//
+// * Invalid and non-standard GURLs are parsed as opaque origins. This includes
+//   non-hierarchical URLs like 'data:text/html,...' and 'javascript:alert(1)'.
+//
+// * GURLs with schemes of 'filesystem' or 'blob' parse the origin out of the
+//   internals of the URL. That is, 'filesystem:https://example.com/temporary/f'
+//   is parsed as ('https', 'example.com', 443).
+//
+// * GURLs with a 'file' scheme are tricky. They are parsed as ('file', '', 0),
+//   but their behavior may differ from embedder to embedder.
+//   TODO(dcheng): This behavior is not consistent with Blink's notion of file
+//   URLs, which always creates an opaque origin.
+//
+// * The host component of an IPv6 address includes brackets, just like the URL
+//   representation.
+//
+// * Constructing origins from GURLs (or from SchemeHostPort) is typically a red
+//   flag (this is true for `url::Origin::Create` but also to some extent for
+//   `url::Origin::Resolve`). See docs/security/origin-vs-url.md for more.
+//
+// * To answer the question "Are |this| and |that| "same-origin" with each
+//   other?", use |Origin::IsSameOriginWith|:
+//
+//     if (this.IsSameOriginWith(that)) {
+//       // Amazingness goes here.
+//     }
+class COMPONENT_EXPORT(URL) Origin {
+ public:
+  // Creates an opaque Origin with a nonce that is different from all previously
+  // existing origins.
+  Origin();
+
+  // WARNING: Converting an URL into an Origin is usually a red flag. See
+  // //docs/security/origin-vs-url.md for more details. Some discussion about
+  // deprecating the Create method can be found in https://crbug.com/1270878.
+  //
+  // Creates an Origin from `url`, as described at
+  // https://url.spec.whatwg.org/#origin, with the following additions:
+  // 1. If `url` is invalid or non-standard, an opaque Origin is constructed.
+  // 2. 'filesystem' URLs behave as 'blob' URLs (that is, the origin is parsed
+  //    out of everything in the URL which follows the scheme).
+  // 3. 'file' URLs all parse as ("file", "", 0).
+  //
+  // WARNING: `url::Origin::Create(url)` can give unexpected results if:
+  // 1) `url` is "about:blank", or "about:srcdoc" (returning unique, opaque
+  //    origin rather than the real origin of the frame)
+  // 2) `url` comes from a sandboxed frame (potentially returning a non-opaque
+  //    origin, when an opaque one is needed; see also
+  //    https://www.html5rocks.com/en/tutorials/security/sandboxed-iframes/)
+  // 3) Wrong `url` is used - e.g. in some navigations `base_url_for_data_url`
+  //    might need to be used instead of relying on
+  //    `content::NavigationHandle::GetURL`.
+  //
+  // WARNING: The returned Origin may have a different scheme and host from
+  // `url` (e.g. in case of blob URLs - see OriginTest.ConstructFromGURL).
+  //
+  // WARNING: data: URLs will be correctly be translated into opaque origins,
+  // but the precursor origin will be lost (unlike with `url::Origin::Resolve`).
+  static Origin Create(const GURL& url);
+
+  // Creates an Origin for the resource `url` as if it were requested
+  // from the context of `base_origin`. If `url` is standard
+  // (in the sense that it embeds a complete origin, like http/https),
+  // this returns the same value as would Create().
+  //
+  // If `url` is "about:blank" or "about:srcdoc", this returns a copy of
+  // `base_origin`.
+  //
+  // Otherwise, returns a new opaque origin derived from `base_origin`.
+  // In this case, the resulting opaque origin will inherit the tuple
+  // (or precursor tuple) of `base_origin`, but will not be same origin
+  // with `base_origin`, even if `base_origin` is already opaque.
+  static Origin Resolve(const GURL& url, const Origin& base_origin);
+
+  // Copyable and movable.
+  Origin(const Origin&);
+  Origin& operator=(const Origin&);
+  Origin(Origin&&) noexcept;
+  Origin& operator=(Origin&&) noexcept;
+
+  // Creates an Origin from a |scheme|, |host|, and |port|. All the parameters
+  // must be valid and canonicalized. Returns nullopt if any parameter is not
+  // canonical, or if all the parameters are empty.
+  //
+  // This constructor should be used in order to pass 'Origin' objects back and
+  // forth over IPC (as transitioning through GURL would risk potentially
+  // dangerous recanonicalization); other potential callers should prefer the
+  // 'GURL'-based constructor.
+  static absl::optional<Origin> UnsafelyCreateTupleOriginWithoutNormalization(
+      base::StringPiece scheme,
+      base::StringPiece host,
+      uint16_t port);
+
+  // Creates an origin without sanity checking that the host is canonicalized.
+  // This should only be used when converting between already normalized types,
+  // and should NOT be used for IPC. Method takes std::strings for use with move
+  // operators to avoid copies.
+  static Origin CreateFromNormalizedTuple(std::string scheme,
+                                          std::string host,
+                                          uint16_t port);
+
+  ~Origin();
+
+  // For opaque origins, these return ("", "", 0).
+  const std::string& scheme() const {
+    return !opaque() ? tuple_.scheme() : base::EmptyString();
+  }
+  const std::string& host() const {
+    return !opaque() ? tuple_.host() : base::EmptyString();
+  }
+  uint16_t port() const { return !opaque() ? tuple_.port() : 0; }
+
+  bool opaque() const { return nonce_.has_value(); }
+
+  // An ASCII serialization of the Origin as per Section 6.2 of RFC 6454, with
+  // the addition that all Origins with a 'file' scheme serialize to "file://".
+  std::string Serialize() const;
+
+  // Two non-opaque Origins are "same-origin" if their schemes, hosts, and ports
+  // are exact matches. Two opaque origins are same-origin only if their
+  // internal nonce values match. A non-opaque origin is never same-origin with
+  // an opaque origin.
+  bool IsSameOriginWith(const Origin& other) const;
+  bool operator==(const Origin& other) const { return IsSameOriginWith(other); }
+  bool operator!=(const Origin& other) const {
+    return !IsSameOriginWith(other);
+  }
+
+  // Non-opaque origin is "same-origin" with `url` if their schemes, hosts, and
+  // ports are exact matches. Opaque origin is never "same-origin" with any
+  // `url`.  about:blank, about:srcdoc, and invalid GURLs are never
+  // "same-origin" with any origin. This method is a shorthand for
+  // `origin.IsSameOriginWith(url::Origin::Create(url))`.
+  //
+  // See also CanBeDerivedFrom.
+  bool IsSameOriginWith(const GURL& url) const;
+
+  // This method returns true for any |url| which if navigated to could result
+  // in an origin compatible with |this|.
+  bool CanBeDerivedFrom(const GURL& url) const;
+
+  // Get the scheme, host, and port from which this origin derives. For
+  // a tuple Origin, this gives the same values as calling scheme(), host()
+  // and port(). For an opaque Origin that was created by calling
+  // Origin::DeriveNewOpaqueOrigin() on a precursor or Origin::Resolve(),
+  // this returns the tuple inherited from the precursor.
+  //
+  // If this Origin is opaque and was created via the default constructor or
+  // Origin::Create(), the precursor origin is unknown.
+  //
+  // Use with great caution: opaque origins should generally not inherit
+  // privileges from the origins they derive from. However, in some cases
+  // (such as restrictions on process placement, or determining the http lock
+  // icon) this information may be relevant to ensure that entering an
+  // opaque origin does not grant privileges initially denied to the original
+  // non-opaque origin.
+  //
+  // This method has a deliberately obnoxious name to prompt caution in its use.
+  const SchemeHostPort& GetTupleOrPrecursorTupleIfOpaque() const {
+    return tuple_;
+  }
+
+  // Efficiently returns what GURL(Serialize()) would without re-parsing the
+  // URL. This can be used for the (rare) times a GURL representation is needed
+  // for an Origin.
+  // Note: The returned URL will not necessarily be serialized to the same value
+  // as the Origin would. The GURL will have an added "/" path for Origins with
+  // valid SchemeHostPorts and file Origins.
+  //
+  // Try not to use this method under normal circumstances, as it loses type
+  // information. Downstream consumers can mistake the returned GURL with a full
+  // URL (e.g. with a path component).
+  GURL GetURL() const;
+
+  // Same as GURL::DomainIs. If |this| origin is opaque, then returns false.
+  bool DomainIs(base::StringPiece canonical_domain) const;
+
+  // Allows Origin to be used as a key in STL (for example, a std::set or
+  // std::map).
+  bool operator<(const Origin& other) const;
+
+  // Creates a new opaque origin that is guaranteed to be cross-origin to all
+  // currently existing origins. An origin created by this method retains its
+  // identity across copies. Copies are guaranteed to be same-origin to each
+  // other, e.g.
+  //
+  //   url::Origin page = Origin::Create(GURL("http://example.com"))
+  //   url::Origin a = page.DeriveNewOpaqueOrigin();
+  //   url::Origin b = page.DeriveNewOpaqueOrigin();
+  //   url::Origin c = a;
+  //   url::Origin d = b;
+  //
+  // |a| and |c| are same-origin, since |c| was copied from |a|. |b| and |d| are
+  // same-origin as well, since |d| was copied from |b|. All other combinations
+  // of origins are considered cross-origin, e.g. |a| is cross-origin to |b| and
+  // |d|, |b| is cross-origin to |a| and |c|, |c| is cross-origin to |b| and
+  // |d|, and |d| is cross-origin to |a| and |c|.
+  Origin DeriveNewOpaqueOrigin() const;
+
+  // Creates a string representation of the object that can be used for logging
+  // and debugging. It serializes the internal state, such as the nonce value
+  // and precursor information.
+  std::string GetDebugString(bool include_nonce = true) const;
+
+#if BUILDFLAG(IS_ANDROID)
+  base::android::ScopedJavaLocalRef<jobject> CreateJavaObject() const;
+  static Origin FromJavaObject(
+      const base::android::JavaRef<jobject>& java_origin);
+  static jlong CreateNative(JNIEnv* env,
+                            const base::android::JavaRef<jstring>& java_scheme,
+                            const base::android::JavaRef<jstring>& java_host,
+                            uint16_t port,
+                            bool is_opaque,
+                            uint64_t tokenHighBits,
+                            uint64_t tokenLowBits);
+#endif  // BUILDFLAG(IS_ANDROID)
+
+  void WriteIntoTrace(perfetto::TracedValue context) const;
+
+ private:
+  friend class blink::SecurityOrigin;
+  friend class blink::SecurityOriginTest;
+  friend class blink::StorageKey;
+  // SchemefulSite needs access to the serialization/deserialization logic which
+  // includes the nonce.
+  friend class net::SchemefulSite;
+  friend class OriginTest;
+  friend struct mojo::UrlOriginAdapter;
+  friend struct ipc_fuzzer::FuzzTraits<Origin>;
+  friend struct mojo::StructTraits<url::mojom::OriginDataView, url::Origin>;
+  friend IPC::ParamTraits<url::Origin>;
+  friend COMPONENT_EXPORT(URL) std::ostream& operator<<(std::ostream& out,
+                                                        const Origin& origin);
+  friend class blink::StorageKeyTest;
+
+  // Origin::Nonce is a wrapper around base::UnguessableToken that generates
+  // the random value only when the value is first accessed. The lazy generation
+  // allows Origin to be default-constructed quickly, without spending time
+  // in random number generation.
+  //
+  // TODO(nick): Should this optimization move into UnguessableToken, once it no
+  // longer treats the Null case specially?
+  class COMPONENT_EXPORT(URL) Nonce {
+   public:
+    // Creates a nonce to hold a newly-generated UnguessableToken. The actual
+    // token value will be generated lazily.
+    Nonce();
+
+    // Creates a nonce to hold an already-generated UnguessableToken value. This
+    // constructor should only be used for IPC serialization and testing --
+    // regular code should never need to touch the UnguessableTokens directly,
+    // and the default constructor is faster.
+    explicit Nonce(const base::UnguessableToken& token);
+
+    // Accessor, which lazily initializes the underlying |token_| member.
+    const base::UnguessableToken& token() const;
+
+    // Do not use in cases where lazy initialization is expected! This
+    // accessor does not initialize the |token_| member.
+    const base::UnguessableToken& raw_token() const;
+
+    // Copyable and movable. Copying a Nonce triggers lazy-initialization,
+    // moving it does not.
+    Nonce(const Nonce&);
+    Nonce& operator=(const Nonce&);
+    Nonce(Nonce&&) noexcept;
+    Nonce& operator=(Nonce&&) noexcept;
+
+    // Note that operator<, used by maps type containers, will trigger |token_|
+    // lazy-initialization. Equality comparisons do not.
+    bool operator<(const Nonce& other) const;
+    bool operator==(const Nonce& other) const;
+    bool operator!=(const Nonce& other) const;
+
+   private:
+    friend class OriginTest;
+
+    // mutable to support lazy generation.
+    mutable base::UnguessableToken token_;
+  };
+
+  // This needs to be friended within Origin as well, since Nonce is a private
+  // nested class of Origin.
+  friend COMPONENT_EXPORT(URL) std::ostream& operator<<(std::ostream& out,
+                                                        const Nonce& nonce);
+
+  // Creates an origin without sanity checking that the host is canonicalized.
+  // This should only be used when converting between already normalized types,
+  // and should NOT be used for IPC. Method takes std::strings for use with move
+  // operators to avoid copies.
+  static Origin CreateOpaqueFromNormalizedPrecursorTuple(
+      std::string precursor_scheme,
+      std::string precursor_host,
+      uint16_t precursor_port,
+      const Nonce& nonce);
+
+  // Creates an opaque Origin with the identity given by |nonce|, and an
+  // optional precursor origin given by |precursor_scheme|, |precursor_host| and
+  // |precursor_port|. Returns nullopt if any parameter is not canonical. When
+  // the precursor is unknown, the precursor parameters should be ("", "", 0).
+  //
+  // This factory method should be used in order to pass opaque Origin objects
+  // back and forth over IPC (as transitioning through GURL would risk
+  // potentially dangerous recanonicalization).
+  static absl::optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization(
+      base::StringPiece precursor_scheme,
+      base::StringPiece precursor_host,
+      uint16_t precursor_port,
+      const Nonce& nonce);
+
+  // Constructs a non-opaque tuple origin. |tuple| must be valid.
+  explicit Origin(SchemeHostPort tuple);
+
+  // Constructs an opaque origin derived from the |precursor| tuple, with the
+  // given |nonce|.
+  Origin(const Nonce& nonce, SchemeHostPort precursor);
+
+  // Get the nonce associated with this origin, if it is opaque, or nullptr
+  // otherwise. This should be used only when trying to send an Origin across an
+  // IPC pipe.
+  const base::UnguessableToken* GetNonceForSerialization() const;
+
+  // Serializes this Origin, including its nonce if it is opaque. If an opaque
+  // origin's |tuple_| is invalid nullopt is returned. If the nonce is not
+  // initialized, a nonce of 0 is used. Use of this method should be limited as
+  // an opaque origin will never be matchable in future browser sessions.
+  absl::optional<std::string> SerializeWithNonce() const;
+
+  // Like SerializeWithNonce(), but forces |nonce_| to be initialized prior to
+  // serializing.
+  absl::optional<std::string> SerializeWithNonceAndInitIfNeeded();
+
+  absl::optional<std::string> SerializeWithNonceImpl() const;
+
+  // Deserializes an origin from |ToValueWithNonce|. Returns nullopt if the
+  // value was invalid in any way.
+  static absl::optional<Origin> Deserialize(const std::string& value);
+
+  // The tuple is used for both tuple origins (e.g. https://example.com:80), as
+  // well as for opaque origins, where it tracks the tuple origin from which
+  // the opaque origin was initially derived (we call this the "precursor"
+  // origin).
+  SchemeHostPort tuple_;
+
+  // The nonce is used for maintaining identity of an opaque origin. This
+  // nonce is preserved when an opaque origin is copied or moved. An Origin
+  // is considered opaque if and only if |nonce_| holds a value.
+  absl::optional<Nonce> nonce_;
+};
+
+// Pretty-printers for logging. These expose the internal state of the nonce.
+COMPONENT_EXPORT(URL)
+std::ostream& operator<<(std::ostream& out, const Origin& origin);
+COMPONENT_EXPORT(URL)
+std::ostream& operator<<(std::ostream& out, const Origin::Nonce& origin);
+
+COMPONENT_EXPORT(URL) bool IsSameOriginWith(const GURL& a, const GURL& b);
+
+// DEBUG_ALIAS_FOR_ORIGIN(var_name, origin) copies `origin` into a new
+// stack-allocated variable named `<var_name>`. This helps ensure that the
+// value of `origin` gets preserved in crash dumps.
+#define DEBUG_ALIAS_FOR_ORIGIN(var_name, origin) \
+  DEBUG_ALIAS_FOR_CSTR(var_name, (origin).Serialize().c_str(), 128)
+
+namespace debug {
+
+class COMPONENT_EXPORT(URL) ScopedOriginCrashKey {
+ public:
+  ScopedOriginCrashKey(base::debug::CrashKeyString* crash_key,
+                       const url::Origin* value);
+  ~ScopedOriginCrashKey();
+
+  ScopedOriginCrashKey(const ScopedOriginCrashKey&) = delete;
+  ScopedOriginCrashKey& operator=(const ScopedOriginCrashKey&) = delete;
+
+ private:
+  base::debug::ScopedCrashKeyString scoped_string_value_;
+};
+
+}  // namespace debug
+
+}  // namespace url
+
+#endif  // URL_ORIGIN_H_
diff --git a/origin_abstract_tests.cc b/origin_abstract_tests.cc
new file mode 100644
index 00000000000..1bc032e4eb0
--- /dev/null
+++ b/origin_abstract_tests.cc
@@ -0,0 +1,104 @@
+// Copyright 2021 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/origin_abstract_tests.h"
+
+namespace url {
+
+void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) {
+  EXPECT_EQ(a, b);
+  const Parsed& a_parsed = a.parsed_for_possibly_invalid_spec();
+  const Parsed& b_parsed = b.parsed_for_possibly_invalid_spec();
+  EXPECT_EQ(a_parsed.scheme.begin, b_parsed.scheme.begin);
+  EXPECT_EQ(a_parsed.scheme.len, b_parsed.scheme.len);
+  EXPECT_EQ(a_parsed.username.begin, b_parsed.username.begin);
+  EXPECT_EQ(a_parsed.username.len, b_parsed.username.len);
+  EXPECT_EQ(a_parsed.password.begin, b_parsed.password.begin);
+  EXPECT_EQ(a_parsed.password.len, b_parsed.password.len);
+  EXPECT_EQ(a_parsed.host.begin, b_parsed.host.begin);
+  EXPECT_EQ(a_parsed.host.len, b_parsed.host.len);
+  EXPECT_EQ(a_parsed.port.begin, b_parsed.port.begin);
+  EXPECT_EQ(a_parsed.port.len, b_parsed.port.len);
+  EXPECT_EQ(a_parsed.path.begin, b_parsed.path.begin);
+  EXPECT_EQ(a_parsed.path.len, b_parsed.path.len);
+  EXPECT_EQ(a_parsed.query.begin, b_parsed.query.begin);
+  EXPECT_EQ(a_parsed.query.len, b_parsed.query.len);
+  EXPECT_EQ(a_parsed.ref.begin, b_parsed.ref.begin);
+  EXPECT_EQ(a_parsed.ref.len, b_parsed.ref.len);
+}
+
+// static
+Origin UrlOriginTestTraits::CreateOriginFromString(base::StringPiece s) {
+  return Origin::Create(GURL(s));
+}
+
+// static
+Origin UrlOriginTestTraits::CreateUniqueOpaqueOrigin() {
+  return Origin();
+}
+
+// static
+Origin UrlOriginTestTraits::CreateWithReferenceOrigin(
+    base::StringPiece url,
+    const Origin& reference_origin) {
+  return Origin::Resolve(GURL(url), reference_origin);
+}
+
+// static
+Origin UrlOriginTestTraits::DeriveNewOpaqueOrigin(
+    const Origin& reference_origin) {
+  return reference_origin.DeriveNewOpaqueOrigin();
+}
+
+// static
+bool UrlOriginTestTraits::IsOpaque(const Origin& origin) {
+  return origin.opaque();
+}
+
+// static
+std::string UrlOriginTestTraits::GetScheme(const Origin& origin) {
+  return origin.scheme();
+}
+
+// static
+std::string UrlOriginTestTraits::GetHost(const Origin& origin) {
+  return origin.host();
+}
+
+// static
+uint16_t UrlOriginTestTraits::GetPort(const Origin& origin) {
+  return origin.port();
+}
+
+// static
+SchemeHostPort UrlOriginTestTraits::GetTupleOrPrecursorTupleIfOpaque(
+    const Origin& origin) {
+  return origin.GetTupleOrPrecursorTupleIfOpaque();
+}
+
+// static
+bool UrlOriginTestTraits::IsSameOrigin(const Origin& a, const Origin& b) {
+  return a.IsSameOriginWith(b);
+}
+
+// static
+std::string UrlOriginTestTraits::Serialize(const Origin& origin) {
+  std::string serialized = origin.Serialize();
+
+  // Extra test assertion for GetURL (which doesn't have an equivalent in
+  // blink::SecurityOrigin).
+  ExpectParsedUrlsEqual(GURL(serialized), origin.GetURL());
+
+  return serialized;
+}
+
+// static
+bool UrlOriginTestTraits::IsValidUrl(base::StringPiece str) {
+  return GURL(str).is_valid();
+}
+
+// This is an abstract test suite which is instantiated by each implementation.
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AbstractOriginTest);
+
+}  // namespace url
diff --git a/origin_abstract_tests.h b/origin_abstract_tests.h
new file mode 100644
index 00000000000..63dded619f3
--- /dev/null
+++ b/origin_abstract_tests.h
@@ -0,0 +1,527 @@
+// Copyright 2020 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_ORIGIN_ABSTRACT_TESTS_H_
+#define URL_ORIGIN_ABSTRACT_TESTS_H_
+
+#include <string>
+#include <type_traits>
+
+#include "base/containers/contains.h"
+#include "base/strings/string_piece.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/gurl.h"
+#include "url/origin.h"
+#include "url/scheme_host_port.h"
+#include "url/url_util.h"
+
+namespace url {
+
+void ExpectParsedUrlsEqual(const GURL& a, const GURL& b);
+
+// AbstractOriginTest below abstracts away differences between url::Origin and
+// blink::SecurityOrigin by parametrizing the tests with a class that has to
+// expose the same public members as UrlOriginTestTraits below.
+class UrlOriginTestTraits {
+ public:
+  using OriginType = Origin;
+
+  // Constructing an origin.
+  static OriginType CreateOriginFromString(base::StringPiece s);
+  static OriginType CreateUniqueOpaqueOrigin();
+  static OriginType CreateWithReferenceOrigin(
+      base::StringPiece url,
+      const OriginType& reference_origin);
+  static OriginType DeriveNewOpaqueOrigin(const OriginType& reference_origin);
+
+  // Accessors for origin properties.
+  static bool IsOpaque(const OriginType& origin);
+  static std::string GetScheme(const OriginType& origin);
+  static std::string GetHost(const OriginType& origin);
+  static uint16_t GetPort(const OriginType& origin);
+  static SchemeHostPort GetTupleOrPrecursorTupleIfOpaque(
+      const OriginType& origin);
+
+  // Wrappers for other instance methods of OriginType.
+  static bool IsSameOrigin(const OriginType& a, const OriginType& b);
+  static std::string Serialize(const OriginType& origin);
+
+  // "Accessors" of URL properties.
+  //
+  // TODO(lukasza): Consider merging together OriginTraitsBase here and
+  // UrlTraitsBase in //url/gurl_abstract_tests.h.
+  static bool IsValidUrl(base::StringPiece str);
+
+  // Only static members = no constructors are needed.
+  UrlOriginTestTraits() = delete;
+};
+
+// Test suite for tests that cover both url::Origin and blink::SecurityOrigin.
+template <typename TOriginTraits>
+class AbstractOriginTest : public testing::Test {
+ public:
+  void SetUp() override {
+    const char* kSchemesToRegister[] = {
+        "noaccess",
+        "std-with-host",
+        "noaccess-std-with-host",
+        "local",
+        "local-noaccess",
+        "local-std-with-host",
+        "local-noaccess-std-with-host",
+        "also-local",
+        "sec",
+        "sec-std-with-host",
+        "sec-noaccess",
+    };
+    for (const char* kScheme : kSchemesToRegister) {
+      std::string scheme(kScheme);
+      if (base::Contains(scheme, "noaccess"))
+        AddNoAccessScheme(kScheme);
+      if (base::Contains(scheme, "std-with-host"))
+        AddStandardScheme(kScheme, SchemeType::SCHEME_WITH_HOST);
+      if (base::Contains(scheme, "local"))
+        AddLocalScheme(kScheme);
+      if (base::Contains(scheme, "sec"))
+        AddSecureScheme(kScheme);
+    }
+  }
+
+ protected:
+  // Wrappers that help ellide away TOriginTraits.
+  //
+  // Note that calling the wrappers needs to be prefixed with `this->...` to
+  // avoid hitting: explicit qualification required to use member 'IsOpaque'
+  // from dependent base class.
+  using OriginType = typename TOriginTraits::OriginType;
+  OriginType CreateOriginFromString(base::StringPiece s) {
+    return TOriginTraits::CreateOriginFromString(s);
+  }
+  OriginType CreateUniqueOpaqueOrigin() {
+    return TOriginTraits::CreateUniqueOpaqueOrigin();
+  }
+  OriginType CreateWithReferenceOrigin(base::StringPiece url,
+                                       const OriginType& reference_origin) {
+    return TOriginTraits::CreateWithReferenceOrigin(url, reference_origin);
+  }
+  OriginType DeriveNewOpaqueOrigin(const OriginType& reference_origin) {
+    return TOriginTraits::DeriveNewOpaqueOrigin(reference_origin);
+  }
+  bool IsOpaque(const OriginType& origin) {
+    return TOriginTraits::IsOpaque(origin);
+  }
+  std::string GetScheme(const OriginType& origin) {
+    return TOriginTraits::GetScheme(origin);
+  }
+  std::string GetHost(const OriginType& origin) {
+    return TOriginTraits::GetHost(origin);
+  }
+  uint16_t GetPort(const OriginType& origin) {
+    return TOriginTraits::GetPort(origin);
+  }
+  SchemeHostPort GetTupleOrPrecursorTupleIfOpaque(const OriginType& origin) {
+    return TOriginTraits::GetTupleOrPrecursorTupleIfOpaque(origin);
+  }
+  bool IsSameOrigin(const OriginType& a, const OriginType& b) {
+    bool is_a_same_with_b = TOriginTraits::IsSameOrigin(a, b);
+    bool is_b_same_with_a = TOriginTraits::IsSameOrigin(b, a);
+    EXPECT_EQ(is_a_same_with_b, is_b_same_with_a);
+    return is_a_same_with_b;
+  }
+  std::string Serialize(const OriginType& origin) {
+    return TOriginTraits::Serialize(origin);
+  }
+  bool IsValidUrl(base::StringPiece str) {
+    return TOriginTraits::IsValidUrl(str);
+  }
+
+#define EXPECT_SAME_ORIGIN(a, b)                                 \
+  EXPECT_TRUE(this->IsSameOrigin((a), (b)))                      \
+      << "When checking if \"" << this->Serialize(a) << "\" is " \
+      << "same-origin with \"" << this->Serialize(b) << "\""
+
+#define EXPECT_CROSS_ORIGIN(a, b)                                \
+  EXPECT_FALSE(this->IsSameOrigin((a), (b)))                     \
+      << "When checking if \"" << this->Serialize(a) << "\" is " \
+      << "cross-origin from \"" << this->Serialize(b) << "\""
+
+  void VerifyOriginInvariants(const OriginType& origin) {
+    // An origin is always same-origin with itself.
+    EXPECT_SAME_ORIGIN(origin, origin);
+
+    // A copy of |origin| should be same-origin as well.
+    auto origin_copy = origin;
+    EXPECT_EQ(this->GetScheme(origin), this->GetScheme(origin_copy));
+    EXPECT_EQ(this->GetHost(origin), this->GetHost(origin_copy));
+    EXPECT_EQ(this->GetPort(origin), this->GetPort(origin_copy));
+    EXPECT_EQ(this->IsOpaque(origin), this->IsOpaque(origin_copy));
+    EXPECT_SAME_ORIGIN(origin, origin_copy);
+
+    // An origin is always cross-origin from another, unique, opaque origin.
+    EXPECT_CROSS_ORIGIN(origin, this->CreateUniqueOpaqueOrigin());
+
+    // An origin is always cross-origin from another tuple origin.
+    auto different_tuple_origin =
+        this->CreateOriginFromString("https://not-in-the-list.test/");
+    EXPECT_CROSS_ORIGIN(origin, different_tuple_origin);
+
+    // Deriving an origin for "about:blank".
+    auto about_blank_origin1 =
+        this->CreateWithReferenceOrigin("about:blank", origin);
+    auto about_blank_origin2 =
+        this->CreateWithReferenceOrigin("about:blank?bar#foo", origin);
+    EXPECT_SAME_ORIGIN(origin, about_blank_origin1);
+    EXPECT_SAME_ORIGIN(origin, about_blank_origin2);
+
+    // Derived opaque origins.
+    std::vector<OriginType> derived_origins = {
+        this->DeriveNewOpaqueOrigin(origin),
+        this->CreateWithReferenceOrigin("data:text/html,baz", origin),
+        this->DeriveNewOpaqueOrigin(about_blank_origin1),
+    };
+    for (size_t i = 0; i < derived_origins.size(); i++) {
+      SCOPED_TRACE(testing::Message() << "Derived origin #" << i);
+      const OriginType& derived_origin = derived_origins[i];
+      EXPECT_TRUE(this->IsOpaque(derived_origin));
+      EXPECT_SAME_ORIGIN(derived_origin, derived_origin);
+      EXPECT_CROSS_ORIGIN(origin, derived_origin);
+      EXPECT_EQ(this->GetTupleOrPrecursorTupleIfOpaque(origin),
+                this->GetTupleOrPrecursorTupleIfOpaque(derived_origin));
+    }
+  }
+
+  void VerifyUniqueOpaqueOriginInvariants(const OriginType& origin) {
+    if (!this->IsOpaque(origin)) {
+      ADD_FAILURE() << "Got unexpectedly non-opaque origin: "
+                    << this->Serialize(origin);
+      return;  // Skip other test assertions.
+    }
+
+    // Opaque origins should have an "empty" scheme, host and port.
+    EXPECT_EQ("", this->GetScheme(origin));
+    EXPECT_EQ("", this->GetHost(origin));
+    EXPECT_EQ(0, this->GetPort(origin));
+
+    // Unique opaque origins should have an empty precursor tuple.
+    EXPECT_EQ(SchemeHostPort(), this->GetTupleOrPrecursorTupleIfOpaque(origin));
+
+    // Serialization test.
+    EXPECT_EQ("null", this->Serialize(origin));
+
+    // Invariants that should hold for any origin.
+    VerifyOriginInvariants(origin);
+  }
+
+  void TestUniqueOpaqueOrigin(base::StringPiece test_input) {
+    auto origin = this->CreateOriginFromString(test_input);
+    this->VerifyUniqueOpaqueOriginInvariants(origin);
+
+    // Re-creating from the URL should be cross-origin.
+    auto origin_recreated_from_same_input =
+        this->CreateOriginFromString(test_input);
+    EXPECT_CROSS_ORIGIN(origin, origin_recreated_from_same_input);
+  }
+
+  void VerifyTupleOriginInvariants(const OriginType& origin,
+                                   const SchemeHostPort& expected_tuple) {
+    if (this->IsOpaque(origin)) {
+      ADD_FAILURE() << "Got unexpectedly opaque origin";
+      return;  // Skip other test assertions.
+    }
+    SCOPED_TRACE(testing::Message()
+                 << "Actual origin: " << this->Serialize(origin));
+
+    // Compare `origin` against the `expected_tuple`.
+    EXPECT_EQ(expected_tuple.scheme(), this->GetScheme(origin));
+    EXPECT_EQ(expected_tuple.host(), this->GetHost(origin));
+    EXPECT_EQ(expected_tuple.port(), this->GetPort(origin));
+    EXPECT_EQ(expected_tuple, this->GetTupleOrPrecursorTupleIfOpaque(origin));
+
+    // Serialization test.
+    //
+    // TODO(lukasza): Consider preserving the hostname when serializing file:
+    // URLs.  Dropping the hostname seems incompatible with section 6 of
+    // rfc6454.  Even though section 4 says that "the implementation MAY
+    // return an implementation-defined value", it seems that Chromium
+    // implementation *does* include the hostname in the origin SchemeHostPort
+    // tuple.
+    if (expected_tuple.scheme() != kFileScheme || expected_tuple.host() == "") {
+      EXPECT_SAME_ORIGIN(origin,
+                         this->CreateOriginFromString(this->Serialize(origin)));
+    }
+
+    // Invariants that should hold for any origin.
+    VerifyOriginInvariants(origin);
+  }
+
+ private:
+  ScopedSchemeRegistryForTests scoped_scheme_registry_;
+};
+
+TYPED_TEST_SUITE_P(AbstractOriginTest);
+
+TYPED_TEST_P(AbstractOriginTest, NonStandardSchemeWithAndroidWebViewHack) {
+  EnableNonStandardSchemesForAndroidWebView();
+
+  // Regression test for https://crbug.com/896059.
+  auto origin = this->CreateOriginFromString("unknown-scheme://");
+  EXPECT_FALSE(this->IsOpaque(origin));
+  EXPECT_EQ("unknown-scheme", this->GetScheme(origin));
+  EXPECT_EQ("", this->GetHost(origin));
+  EXPECT_EQ(0, this->GetPort(origin));
+
+  // about:blank translates into an opaque origin, even in presence of
+  // EnableNonStandardSchemesForAndroidWebView.
+  origin = this->CreateOriginFromString("about:blank");
+  EXPECT_TRUE(this->IsOpaque(origin));
+}
+
+TYPED_TEST_P(AbstractOriginTest, OpaqueOriginsFromValidUrls) {
+  const char* kTestCases[] = {
+      // Built-in noaccess schemes.
+      "data:text/html,Hello!",
+      "javascript:alert(1)",
+      "about:blank",
+
+      // Opaque blob URLs.
+      "blob:null/foo",        // blob:null (actually a valid URL)
+      "blob:data:foo",        // blob + data (which is nonstandard)
+      "blob:about://blank/",  // blob + about (which is nonstandard)
+      "blob:about:blank/",    // blob + about (which is nonstandard)
+      "blob:blob:http://www.example.com/guid-goes-here",
+      "blob:filesystem:ws:b/.",
+      "blob:filesystem:ftp://a/b",
+      "blob:blob:file://localhost/foo/bar",
+  };
+
+  for (const char* test_input : kTestCases) {
+    SCOPED_TRACE(testing::Message() << "Test input: " << test_input);
+
+    // Verify that `origin` is opaque not just because `test_input` results is
+    // an invalid URL (because of a typo in the scheme name, or because of a
+    // technicality like having no host in a noaccess-std-with-host: scheme).
+    EXPECT_TRUE(this->IsValidUrl(test_input));
+
+    this->TestUniqueOpaqueOrigin(test_input);
+  }
+}
+
+TYPED_TEST_P(AbstractOriginTest, OpaqueOriginsFromInvalidUrls) {
+  // TODO(lukasza): Consider moving those to GURL/KURL tests that verify what
+  // inputs are parsed as an invalid URL.
+
+  const char* kTestCases[] = {
+      // Invalid file: URLs.
+      "file://example.com:443/etc/passwd",  // No port expected.
+
+      // Invalid HTTP URLs.
+      "http",
+      "http:",
+      "http:/",
+      "http://",
+      "http://:",
+      "http://:1",
+      "http::///invalid.example.com/",
+      "http://example.com:65536/",                    // Port out of range.
+      "http://example.com:-1/",                       // Port out of range.
+      "http://example.com:18446744073709551616/",     // Port = 2^64.
+      "http://example.com:18446744073709551616999/",  // Lots of port digits.
+
+      // Invalid filesystem URLs.
+      "filesystem:http://example.com/",  // Missing /type/.
+      "filesystem:local:baz./type/",
+      "filesystem:local://hostname/type/",
+      "filesystem:unknown-scheme://hostname/type/",
+      "filesystem:filesystem:http://example.org:88/foo/bar",
+
+      // Invalid IP addresses
+      "http://[]/",
+      "http://[2001:0db8:0000:0000:0000:0000:0000:0000:0001]/",  // 9 groups.
+
+      // Unknown scheme without a colon character (":") gives an invalid URL.
+      "unknown-scheme",
+
+      // Standard schemes require a hostname (and result in an opaque origin if
+      // the hostname is missing).
+      "local-std-with-host:",
+      "noaccess-std-with-host:",
+  };
+
+  for (const char* test_input : kTestCases) {
+    SCOPED_TRACE(testing::Message() << "Test input: " << test_input);
+
+    // All testcases here are expected to represent invalid URLs.
+    // an invalid URL (because of a type in scheme name, or because of a
+    // technicality like having no host in a noaccess-std-with-host: scheme).
+    EXPECT_FALSE(this->IsValidUrl(test_input));
+
+    // Invalid URLs should always result in an opaque origin.
+    this->TestUniqueOpaqueOrigin(test_input);
+  }
+}
+
+TYPED_TEST_P(AbstractOriginTest, TupleOrigins) {
+  struct TestCase {
+    const char* input;
+    SchemeHostPort expected_tuple;
+  } kTestCases[] = {
+      // file: URLs
+      {"file:///etc/passwd", {"file", "", 0}},
+      {"file://example.com/etc/passwd", {"file", "example.com", 0}},
+      {"file:///", {"file", "", 0}},
+      {"file://hostname/C:/dir/file.txt", {"file", "hostname", 0}},
+
+      // HTTP URLs
+      {"http://example.com/", {"http", "example.com", 80}},
+      {"http://example.com:80/", {"http", "example.com", 80}},
+      {"http://example.com:123/", {"http", "example.com", 123}},
+      {"http://example.com:0/", {"http", "example.com", 0}},
+      {"http://example.com:65535/", {"http", "example.com", 65535}},
+      {"https://example.com/", {"https", "example.com", 443}},
+      {"https://example.com:443/", {"https", "example.com", 443}},
+      {"https://example.com:123/", {"https", "example.com", 123}},
+      {"https://example.com:0/", {"https", "example.com", 0}},
+      {"https://example.com:65535/", {"https", "example.com", 65535}},
+      {"http://user:pass@example.com/", {"http", "example.com", 80}},
+      {"http://example.com:123/?query", {"http", "example.com", 123}},
+      {"https://example.com/#1234", {"https", "example.com", 443}},
+      {"https://u:p@example.com:123/?query#1234",
+       {"https", "example.com", 123}},
+      {"http://example/", {"http", "example", 80}},
+
+      // Blob URLs.
+      {"blob:http://example.com/guid-goes-here", {"http", "example.com", 80}},
+      {"blob:http://example.com:123/guid-goes-here",
+       {"http", "example.com", 123}},
+      {"blob:https://example.com/guid-goes-here",
+       {"https", "example.com", 443}},
+      {"blob:http://u:p@example.com/guid-goes-here",
+       {"http", "example.com", 80}},
+
+      // Filesystem URLs.
+      {"filesystem:http://example.com/type/", {"http", "example.com", 80}},
+      {"filesystem:http://example.com:123/type/", {"http", "example.com", 123}},
+      {"filesystem:https://example.com/type/", {"https", "example.com", 443}},
+      {"filesystem:https://example.com:123/type/",
+       {"https", "example.com", 123}},
+      {"filesystem:local-std-with-host:baz./type/",
+       {"local-std-with-host", "baz.", 0}},
+
+      // IP Addresses
+      {"http://192.168.9.1/", {"http", "192.168.9.1", 80}},
+      {"http://[2001:db8::1]/", {"http", "[2001:db8::1]", 80}},
+      {"http://[2001:0db8:0000:0000:0000:0000:0000:0001]/",
+       {"http", "[2001:db8::1]", 80}},
+      {"http://1/", {"http", "0.0.0.1", 80}},
+      {"http://1:1/", {"http", "0.0.0.1", 1}},
+      {"http://3232237825/", {"http", "192.168.9.1", 80}},
+
+      // Punycode
+      {"http://☃.net/", {"http", "xn--n3h.net", 80}},
+      {"blob:http://☃.net/", {"http", "xn--n3h.net", 80}},
+      {"local-std-with-host:↑↑↓↓←→←→ba.↑↑↓↓←→←→ba.0.bg",
+       {"local-std-with-host", "xn--ba-rzuadaibfa.xn--ba-rzuadaibfa.0.bg", 0}},
+
+      // Registered URLs
+      {"ftp://example.com/", {"ftp", "example.com", 21}},
+      {"ws://example.com/", {"ws", "example.com", 80}},
+      {"wss://example.com/", {"wss", "example.com", 443}},
+      {"wss://user:pass@example.com/", {"wss", "example.com", 443}},
+  };
+
+  for (const TestCase& test : kTestCases) {
+    SCOPED_TRACE(testing::Message() << "Test input: " << test.input);
+
+    // Only valid URLs should translate into valid, non-opaque origins.
+    EXPECT_TRUE(this->IsValidUrl(test.input));
+
+    auto origin = this->CreateOriginFromString(test.input);
+    this->VerifyTupleOriginInvariants(origin, test.expected_tuple);
+  }
+}
+
+TYPED_TEST_P(AbstractOriginTest, CustomSchemes_OpaqueOrigins) {
+  const char* kTestCases[] = {
+      // Unknown scheme
+      "unknown-scheme:foo",
+      "unknown-scheme://bar",
+
+      // Unknown scheme that is a prefix or suffix of a registered scheme.
+      "loca:foo",
+      "ocal:foo",
+      "local-suffix:foo",
+      "prefix-local:foo",
+
+      // Custom no-access schemes translate into an opaque origin (just like the
+      // built-in no-access schemes such as about:blank or data:).
+      "noaccess-std-with-host:foo",
+      "noaccess-std-with-host://bar",
+      "noaccess://host",
+      "local-noaccess://host",
+      "local-noaccess-std-with-host://host",
+  };
+
+  for (const char* test_input : kTestCases) {
+    SCOPED_TRACE(testing::Message() << "Test input: " << test_input);
+
+    // Verify that `origin` is opaque not just because `test_input` results is
+    // an invalid URL (because of a typo in the scheme name, or because of a
+    // technicality like having no host in a noaccess-std-with-host: scheme).
+    EXPECT_TRUE(this->IsValidUrl(test_input));
+
+    this->TestUniqueOpaqueOrigin(test_input);
+  }
+}
+
+TYPED_TEST_P(AbstractOriginTest, CustomSchemes_TupleOrigins) {
+  struct TestCase {
+    const char* input;
+    SchemeHostPort expected_tuple;
+  } kTestCases[] = {
+      // Scheme (registered in SetUp()) that's both local and standard.
+      // TODO: Is it really appropriate to do network-host canonicalization of
+      // schemes without ports?
+      {"local-std-with-host:20", {"local-std-with-host", "0.0.0.20", 0}},
+      {"local-std-with-host:20.", {"local-std-with-host", "0.0.0.20", 0}},
+      {"local-std-with-host:foo", {"local-std-with-host", "foo", 0}},
+      {"local-std-with-host://bar:20", {"local-std-with-host", "bar", 0}},
+      {"local-std-with-host:baz.", {"local-std-with-host", "baz.", 0}},
+      {"local-std-with-host:baz..", {"local-std-with-host", "baz..", 0}},
+      {"local-std-with-host:baz..bar", {"local-std-with-host", "baz..bar", 0}},
+      {"local-std-with-host:baz...", {"local-std-with-host", "baz...", 0}},
+
+      // Scheme (registered in SetUp()) that's local but nonstandard. These
+      // always have empty hostnames, but are allowed to be url::Origins.
+      {"local:", {"local", "", 0}},
+      {"local:foo", {"local", "", 0}},
+      {"local://bar", {"local", "", 0}},
+      {"also-local://bar", {"also-local", "", 0}},
+
+      {"std-with-host://host", {"std-with-host", "host", 0}},
+      {"local://host", {"local", "", 0}},
+      {"local-std-with-host://host", {"local-std-with-host", "host", 0}},
+  };
+
+  for (const TestCase& test : kTestCases) {
+    SCOPED_TRACE(testing::Message() << "Test input: " << test.input);
+
+    // Only valid URLs should translate into valid, non-opaque origins.
+    EXPECT_TRUE(this->IsValidUrl(test.input));
+
+    auto origin = this->CreateOriginFromString(test.input);
+    this->VerifyTupleOriginInvariants(origin, test.expected_tuple);
+  }
+}
+
+REGISTER_TYPED_TEST_SUITE_P(AbstractOriginTest,
+                            NonStandardSchemeWithAndroidWebViewHack,
+                            OpaqueOriginsFromValidUrls,
+                            OpaqueOriginsFromInvalidUrls,
+                            TupleOrigins,
+                            CustomSchemes_OpaqueOrigins,
+                            CustomSchemes_TupleOrigins);
+
+}  // namespace url
+
+#endif  // URL_ORIGIN_ABSTRACT_TESTS_H_
diff --git a/origin_unittest.cc b/origin_unittest.cc
new file mode 100644
index 00000000000..47cca812a65
--- /dev/null
+++ b/origin_unittest.cc
@@ -0,0 +1,777 @@
+// Copyright 2015 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "base/memory/raw_ptr.h"
+#include "testing/gmock/include/gmock/gmock.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/gurl.h"
+#include "url/origin.h"
+#include "url/origin_abstract_tests.h"
+#include "url/url_util.h"
+
+namespace url {
+
+class OriginTest : public ::testing::Test {
+ public:
+  void SetUp() override {
+    // Add two schemes which are local but nonstandard.
+    AddLocalScheme("local-but-nonstandard");
+    AddLocalScheme("also-local-but-nonstandard");
+
+    // Add a scheme that's both local and standard.
+    AddStandardScheme("local-and-standard", SchemeType::SCHEME_WITH_HOST);
+    AddLocalScheme("local-and-standard");
+
+    // Add a scheme that's standard but no-access. We still want these to
+    // form valid SchemeHostPorts, even though they always commit as opaque
+    // origins, so that they can represent the source of the resource even if
+    // it's not committable as a non-opaque origin.
+    AddStandardScheme("standard-but-noaccess", SchemeType::SCHEME_WITH_HOST);
+    AddNoAccessScheme("standard-but-noaccess");
+  }
+
+  ::testing::AssertionResult DoEqualityComparisons(const url::Origin& a,
+                                                   const url::Origin& b,
+                                                   bool should_compare_equal) {
+    ::testing::AssertionResult failure = ::testing::AssertionFailure();
+    failure << "DoEqualityComparisons failure. Expecting "
+            << (should_compare_equal ? "equality" : "inequality")
+            << " between:\n  a\n    Which is: " << a
+            << "\n  b\n    Which is: " << b << "\nThe following check failed: ";
+    if (a.IsSameOriginWith(b) != should_compare_equal)
+      return failure << "a.IsSameOriginWith(b)";
+    if (b.IsSameOriginWith(a) != should_compare_equal)
+      return failure << "b.IsSameOriginWith(a)";
+    if ((a == b) != should_compare_equal)
+      return failure << "(a == b)";
+    if ((b == a) != should_compare_equal)
+      return failure << "(b == a)";
+    if ((b != a) != !should_compare_equal)
+      return failure << "(b != a)";
+    if ((a != b) != !should_compare_equal)
+      return failure << "(a != b)";
+    return ::testing::AssertionSuccess();
+  }
+
+  bool HasNonceTokenBeenInitialized(const url::Origin& origin) {
+    EXPECT_TRUE(origin.opaque());
+    // Avoid calling nonce_.token() here, to not trigger lazy initialization.
+    return !origin.nonce_->token_.is_empty();
+  }
+
+  Origin::Nonce CreateNonce() { return Origin::Nonce(); }
+
+  Origin::Nonce CreateNonce(base::UnguessableToken nonce) {
+    return Origin::Nonce(nonce);
+  }
+
+  const base::UnguessableToken* GetNonce(const Origin& origin) {
+    return origin.GetNonceForSerialization();
+  }
+
+  // Wrappers around url::Origin methods to expose it to tests.
+
+  absl::optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization(
+      base::StringPiece precursor_scheme,
+      base::StringPiece precursor_host,
+      uint16_t precursor_port,
+      const Origin::Nonce& nonce) {
+    return Origin::UnsafelyCreateOpaqueOriginWithoutNormalization(
+        precursor_scheme, precursor_host, precursor_port, nonce);
+  }
+
+  absl::optional<std::string> SerializeWithNonce(const Origin& origin) {
+    return origin.SerializeWithNonce();
+  }
+
+  absl::optional<std::string> SerializeWithNonceAndInitIfNeeded(
+      Origin& origin) {
+    return origin.SerializeWithNonceAndInitIfNeeded();
+  }
+
+  absl::optional<Origin> Deserialize(const std::string& value) {
+    return Origin::Deserialize(value);
+  }
+
+ private:
+  ScopedSchemeRegistryForTests scoped_registry_;
+};
+
+TEST_F(OriginTest, OpaqueOriginComparison) {
+  // A default-constructed Origin should should be cross origin to everything
+  // but itself.
+  url::Origin opaque_a, opaque_b;
+  EXPECT_TRUE(opaque_a.opaque());
+  EXPECT_EQ("", opaque_a.scheme());
+  EXPECT_EQ("", opaque_a.host());
+  EXPECT_EQ(0, opaque_a.port());
+  EXPECT_EQ(SchemeHostPort(), opaque_a.GetTupleOrPrecursorTupleIfOpaque());
+  EXPECT_FALSE(opaque_a.GetTupleOrPrecursorTupleIfOpaque().IsValid());
+
+  EXPECT_TRUE(opaque_b.opaque());
+  EXPECT_EQ("", opaque_b.scheme());
+  EXPECT_EQ("", opaque_b.host());
+  EXPECT_EQ(0, opaque_b.port());
+  EXPECT_EQ(SchemeHostPort(), opaque_b.GetTupleOrPrecursorTupleIfOpaque());
+  EXPECT_FALSE(opaque_b.GetTupleOrPrecursorTupleIfOpaque().IsValid());
+
+  // Two default-constructed Origins should always be cross origin to each
+  // other.
+  EXPECT_TRUE(DoEqualityComparisons(opaque_a, opaque_b, false));
+  EXPECT_TRUE(DoEqualityComparisons(opaque_b, opaque_b, true));
+  EXPECT_TRUE(DoEqualityComparisons(opaque_a, opaque_a, true));
+
+  // The streaming operator should not trigger lazy initialization to the token.
+  std::ostringstream stream;
+  stream << opaque_a;
+  EXPECT_STREQ("null [internally: (nonce TBD) anonymous]",
+               stream.str().c_str());
+  EXPECT_FALSE(HasNonceTokenBeenInitialized(opaque_a));
+
+  // None of the operations thus far should have triggered lazy-generation of
+  // the UnguessableToken. Copying an origin, however, should trigger this.
+  EXPECT_FALSE(HasNonceTokenBeenInitialized(opaque_a));
+  EXPECT_FALSE(HasNonceTokenBeenInitialized(opaque_b));
+  opaque_b = opaque_a;
+
+  EXPECT_TRUE(HasNonceTokenBeenInitialized(opaque_a));
+  EXPECT_TRUE(HasNonceTokenBeenInitialized(opaque_b));
+  EXPECT_TRUE(DoEqualityComparisons(opaque_a, opaque_b, true));
+  EXPECT_TRUE(DoEqualityComparisons(opaque_b, opaque_b, true));
+  EXPECT_TRUE(DoEqualityComparisons(opaque_a, opaque_a, true));
+
+  // Move-initializing to a fresh Origin should restore the lazy initialization.
+  opaque_a = url::Origin();
+  EXPECT_FALSE(HasNonceTokenBeenInitialized(opaque_a));
+  EXPECT_TRUE(HasNonceTokenBeenInitialized(opaque_b));
+  EXPECT_TRUE(DoEqualityComparisons(opaque_a, opaque_b, false));
+  EXPECT_TRUE(DoEqualityComparisons(opaque_b, opaque_b, true));
+  EXPECT_TRUE(DoEqualityComparisons(opaque_a, opaque_a, true));
+
+  // Comparing two opaque Origins with matching SchemeHostPorts should trigger
+  // lazy initialization.
+  EXPECT_FALSE(HasNonceTokenBeenInitialized(opaque_a));
+  EXPECT_TRUE(HasNonceTokenBeenInitialized(opaque_b));
+  bool should_swap = opaque_b < opaque_a;
+  EXPECT_TRUE(HasNonceTokenBeenInitialized(opaque_a));
+  EXPECT_TRUE(HasNonceTokenBeenInitialized(opaque_b));
+
+  if (should_swap)
+    std::swap(opaque_a, opaque_b);
+  EXPECT_LT(opaque_a, opaque_b);
+  EXPECT_FALSE(opaque_b < opaque_a);
+
+  EXPECT_TRUE(DoEqualityComparisons(opaque_a, opaque_b, false));
+  EXPECT_TRUE(DoEqualityComparisons(opaque_b, opaque_b, true));
+  EXPECT_TRUE(DoEqualityComparisons(opaque_a, opaque_a, true));
+
+  EXPECT_LT(opaque_a, url::Origin::Create(GURL("http://www.google.com")));
+  EXPECT_LT(opaque_b, url::Origin::Create(GURL("http://www.google.com")));
+
+  EXPECT_EQ(opaque_b, url::Origin::Resolve(GURL(), opaque_b));
+  EXPECT_EQ(opaque_b, url::Origin::Resolve(GURL("about:blank"), opaque_b));
+  EXPECT_EQ(opaque_b, url::Origin::Resolve(GURL("about:srcdoc"), opaque_b));
+  EXPECT_EQ(opaque_b,
+            url::Origin::Resolve(GURL("about:blank?hello#whee"), opaque_b));
+}
+
+TEST_F(OriginTest, ConstructFromTuple) {
+  struct TestCases {
+    const char* const scheme;
+    const char* const host;
+    const uint16_t port;
+  } cases[] = {
+      {"http", "example.com", 80},
+      {"http", "example.com", 123},
+      {"https", "example.com", 443},
+  };
+
+  for (const auto& test_case : cases) {
+    testing::Message scope_message;
+    scope_message << test_case.scheme << "://" << test_case.host << ":"
+                  << test_case.port;
+    SCOPED_TRACE(scope_message);
+    Origin origin = Origin::CreateFromNormalizedTuple(
+        test_case.scheme, test_case.host, test_case.port);
+
+    EXPECT_EQ(test_case.scheme, origin.scheme());
+    EXPECT_EQ(test_case.host, origin.host());
+    EXPECT_EQ(test_case.port, origin.port());
+  }
+}
+
+TEST_F(OriginTest, Serialization) {
+  struct TestCases {
+    const char* const url;
+    const char* const expected;
+    const char* const expected_log;
+  } cases[] = {
+      {"http://192.168.9.1/", "http://192.168.9.1"},
+      {"http://[2001:db8::1]/", "http://[2001:db8::1]"},
+      {"http://☃.net/", "http://xn--n3h.net"},
+      {"http://example.com/", "http://example.com"},
+      {"http://example.com:123/", "http://example.com:123"},
+      {"https://example.com/", "https://example.com"},
+      {"https://example.com:123/", "https://example.com:123"},
+      {"file:///etc/passwd", "file://", "file:// [internally: file://]"},
+      {"file://example.com/etc/passwd", "file://",
+       "file:// [internally: file://example.com]"},
+      {"data:,", "null", "null [internally: (nonce TBD) anonymous]"},
+  };
+
+  for (const auto& test_case : cases) {
+    SCOPED_TRACE(test_case.url);
+    GURL url(test_case.url);
+    EXPECT_TRUE(url.is_valid());
+    Origin origin = Origin::Create(url);
+    std::string serialized = origin.Serialize();
+    ExpectParsedUrlsEqual(GURL(serialized), origin.GetURL());
+
+    EXPECT_EQ(test_case.expected, serialized);
+
+    // The '<<' operator sometimes produces additional information.
+    std::stringstream out;
+    out << origin;
+    if (test_case.expected_log)
+      EXPECT_EQ(test_case.expected_log, out.str());
+    else
+      EXPECT_EQ(test_case.expected, out.str());
+  }
+}
+
+TEST_F(OriginTest, Comparison) {
+  // These URLs are arranged in increasing order:
+  const char* const urls[] = {
+      "data:uniqueness", "http://a:80",  "http://b:80",
+      "https://a:80",    "https://b:80", "http://a:81",
+      "http://b:81",     "https://a:81", "https://b:81",
+  };
+  // Validate the comparison logic still works when creating a canonical origin,
+  // when any created opaque origins contain a nonce.
+  {
+    // Pre-create the origins, as the internal nonce for unique origins changes
+    // with each freshly-constructed Origin (that's not copied).
+    std::vector<Origin> origins;
+    for (const auto* test_url : urls)
+      origins.push_back(Origin::Create(GURL(test_url)));
+    for (size_t i = 0; i < origins.size(); i++) {
+      const Origin& current = origins[i];
+      for (size_t j = i; j < origins.size(); j++) {
+        const Origin& to_compare = origins[j];
+        EXPECT_EQ(i < j, current < to_compare) << i << " < " << j;
+        EXPECT_EQ(j < i, to_compare < current) << j << " < " << i;
+      }
+    }
+  }
+}
+
+TEST_F(OriginTest, UnsafelyCreate) {
+  struct TestCase {
+    const char* scheme;
+    const char* host;
+    uint16_t port;
+  } cases[] = {
+      {"http", "example.com", 80},
+      {"http", "example.com", 123},
+      {"https", "example.com", 443},
+      {"https", "example.com", 123},
+      {"http", "example.com", 0},  // 0 is a valid port for http.
+      {"file", "", 0},             // 0 indicates "no port" for file: scheme.
+      {"file", "example.com", 0},
+  };
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message()
+                 << test.scheme << "://" << test.host << ":" << test.port);
+    absl::optional<url::Origin> origin =
+        url::Origin::UnsafelyCreateTupleOriginWithoutNormalization(
+            test.scheme, test.host, test.port);
+    ASSERT_TRUE(origin);
+    EXPECT_EQ(test.scheme, origin->scheme());
+    EXPECT_EQ(test.host, origin->host());
+    EXPECT_EQ(test.port, origin->port());
+    EXPECT_FALSE(origin->opaque());
+    EXPECT_TRUE(origin->IsSameOriginWith(*origin));
+
+    ExpectParsedUrlsEqual(GURL(origin->Serialize()), origin->GetURL());
+
+    base::UnguessableToken nonce = base::UnguessableToken::Create();
+    absl::optional<url::Origin> opaque_origin =
+        UnsafelyCreateOpaqueOriginWithoutNormalization(
+            test.scheme, test.host, test.port, CreateNonce(nonce));
+    ASSERT_TRUE(opaque_origin);
+    EXPECT_TRUE(opaque_origin->opaque());
+    EXPECT_FALSE(*opaque_origin == origin);
+    EXPECT_EQ(opaque_origin->GetTupleOrPrecursorTupleIfOpaque(),
+              origin->GetTupleOrPrecursorTupleIfOpaque());
+    EXPECT_EQ(opaque_origin,
+              UnsafelyCreateOpaqueOriginWithoutNormalization(
+                  test.scheme, test.host, test.port, CreateNonce(nonce)));
+    EXPECT_FALSE(*opaque_origin == origin->DeriveNewOpaqueOrigin());
+  }
+}
+
+TEST_F(OriginTest, UnsafelyCreateUniqueOnInvalidInput) {
+  url::AddStandardScheme("host-only", url::SCHEME_WITH_HOST);
+  url::AddStandardScheme("host-port-only", url::SCHEME_WITH_HOST_AND_PORT);
+  struct TestCases {
+    const char* scheme;
+    const char* host;
+    uint16_t port = 80;
+  } cases[] = {{"", "", 33},
+               {"data", "", 0},
+               {"blob", "", 0},
+               {"filesystem", "", 0},
+               {"data", "example.com"},
+               {"http", "☃.net"},
+               {"http\nmore", "example.com"},
+               {"http\rmore", "example.com"},
+               {"http\n", "example.com"},
+               {"http\r", "example.com"},
+               {"http", "example.com\nnot-example.com"},
+               {"http", "example.com\rnot-example.com"},
+               {"http", "example.com\n"},
+               {"http", "example.com\r"},
+               {"unknown-scheme", "example.com"},
+               {"host-only", "\r", 0},
+               {"host-only", "example.com", 22},
+               {"file", "", 123}};  // file: shouldn't have a port.
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message()
+                 << test.scheme << "://" << test.host << ":" << test.port);
+    EXPECT_FALSE(UnsafelyCreateOpaqueOriginWithoutNormalization(
+        test.scheme, test.host, test.port, CreateNonce()));
+    EXPECT_FALSE(url::Origin::UnsafelyCreateTupleOriginWithoutNormalization(
+        test.scheme, test.host, test.port));
+  }
+
+  // An empty scheme/host/port tuple is not a valid tuple origin.
+  EXPECT_FALSE(
+      url::Origin::UnsafelyCreateTupleOriginWithoutNormalization("", "", 0));
+
+  // Opaque origins with unknown precursors are allowed.
+  base::UnguessableToken token = base::UnguessableToken::Create();
+  absl::optional<url::Origin> anonymous_opaque =
+      UnsafelyCreateOpaqueOriginWithoutNormalization("", "", 0,
+                                                     CreateNonce(token));
+  ASSERT_TRUE(anonymous_opaque)
+      << "An invalid tuple is a valid input to "
+      << "UnsafelyCreateOpaqueOriginWithoutNormalization, so long as it is "
+      << "the canonical form of the invalid tuple.";
+  EXPECT_TRUE(anonymous_opaque->opaque());
+  EXPECT_EQ(*GetNonce(anonymous_opaque.value()), token);
+  EXPECT_EQ(anonymous_opaque->GetTupleOrPrecursorTupleIfOpaque(),
+            url::SchemeHostPort());
+}
+
+TEST_F(OriginTest, UnsafelyCreateUniqueViaEmbeddedNulls) {
+  struct TestCases {
+    base::StringPiece scheme;
+    base::StringPiece host;
+    uint16_t port = 80;
+  } cases[] = {{{"http\0more", 9}, {"example.com", 11}},
+               {{"http\0", 5}, {"example.com", 11}},
+               {{"\0http", 5}, {"example.com", 11}},
+               {{"http"}, {"example.com\0not-example.com", 27}},
+               {{"http"}, {"example.com\0", 12}},
+               {{"http"}, {"\0example.com", 12}},
+               {{""}, {"\0", 1}, 0},
+               {{"\0", 1}, {""}, 0}};
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message()
+                 << test.scheme << "://" << test.host << ":" << test.port);
+    EXPECT_FALSE(url::Origin::UnsafelyCreateTupleOriginWithoutNormalization(
+        test.scheme, test.host, test.port));
+    EXPECT_FALSE(UnsafelyCreateOpaqueOriginWithoutNormalization(
+        test.scheme, test.host, test.port, CreateNonce()));
+  }
+}
+
+TEST_F(OriginTest, DomainIs) {
+  const struct {
+    const char* url;
+    const char* lower_ascii_domain;
+    bool expected_domain_is;
+  } kTestCases[] = {
+      {"http://google.com/foo", "google.com", true},
+      {"http://www.google.com:99/foo", "google.com", true},
+      {"http://www.google.com.cn/foo", "google.com", false},
+      {"http://www.google.comm", "google.com", false},
+      {"http://www.iamnotgoogle.com/foo", "google.com", false},
+      {"http://www.google.com/foo", "Google.com", false},
+
+      // If the host ends with a dot, it matches domains with or without a dot.
+      {"http://www.google.com./foo", "google.com", true},
+      {"http://www.google.com./foo", "google.com.", true},
+      {"http://www.google.com./foo", ".com", true},
+      {"http://www.google.com./foo", ".com.", true},
+
+      // But, if the host doesn't end with a dot and the input domain does, then
+      // it's considered to not match.
+      {"http://google.com/foo", "google.com.", false},
+
+      // If the host ends with two dots, it doesn't match.
+      {"http://www.google.com../foo", "google.com", false},
+
+      // Filesystem scheme.
+      {"filesystem:http://www.google.com:99/foo/", "google.com", true},
+      {"filesystem:http://www.iamnotgoogle.com/foo/", "google.com", false},
+
+      // File scheme.
+      {"file:///home/user/text.txt", "", false},
+      {"file:///home/user/text.txt", "txt", false},
+  };
+
+  for (const auto& test_case : kTestCases) {
+    SCOPED_TRACE(testing::Message()
+                 << "(url, domain): (" << test_case.url << ", "
+                 << test_case.lower_ascii_domain << ")");
+    GURL url(test_case.url);
+    ASSERT_TRUE(url.is_valid());
+    Origin origin = Origin::Create(url);
+
+    EXPECT_EQ(test_case.expected_domain_is,
+              origin.DomainIs(test_case.lower_ascii_domain));
+    EXPECT_FALSE(
+        origin.DeriveNewOpaqueOrigin().DomainIs(test_case.lower_ascii_domain));
+  }
+
+  // If the URL is invalid, DomainIs returns false.
+  GURL invalid_url("google.com");
+  ASSERT_FALSE(invalid_url.is_valid());
+  EXPECT_FALSE(Origin::Create(invalid_url).DomainIs("google.com"));
+
+  // Unique origins.
+  EXPECT_FALSE(Origin().DomainIs(""));
+  EXPECT_FALSE(Origin().DomainIs("com"));
+}
+
+TEST_F(OriginTest, DebugAlias) {
+  Origin origin1 = Origin::Create(GURL("https://foo.com/bar"));
+  DEBUG_ALIAS_FOR_ORIGIN(origin1_debug_alias, origin1);
+  EXPECT_STREQ("https://foo.com", origin1_debug_alias);
+}
+
+TEST_F(OriginTest, CanBeDerivedFrom) {
+  AddStandardScheme("new-standard", SchemeType::SCHEME_WITH_HOST);
+  Origin opaque_unique_origin = Origin();
+
+  Origin regular_origin = Origin::Create(GURL("https://a.com/"));
+  Origin opaque_precursor_origin = regular_origin.DeriveNewOpaqueOrigin();
+
+  Origin file_origin = Origin::Create(GURL("file:///foo/bar"));
+  Origin file_opaque_precursor_origin = file_origin.DeriveNewOpaqueOrigin();
+  Origin file_host_origin = Origin::Create(GURL("file://a.com/foo/bar"));
+  Origin file_host_opaque_precursor_origin =
+      file_host_origin.DeriveNewOpaqueOrigin();
+
+  Origin non_standard_scheme_origin =
+      Origin::Create(GURL("non-standard-scheme:foo"));
+  Origin non_standard_opaque_precursor_origin =
+      non_standard_scheme_origin.DeriveNewOpaqueOrigin();
+
+  // Also, add new standard scheme that is local to the test.
+  Origin new_standard_origin = Origin::Create(GURL("new-standard://host/"));
+  Origin new_standard_opaque_precursor_origin =
+      new_standard_origin.DeriveNewOpaqueOrigin();
+
+  // No access schemes always get unique opaque origins.
+  Origin no_access_origin =
+      Origin::Create(GURL("standard-but-noaccess://b.com"));
+  Origin no_access_opaque_precursor_origin =
+      no_access_origin.DeriveNewOpaqueOrigin();
+
+  Origin local_non_standard_origin =
+      Origin::Create(GURL("local-but-nonstandard://a.com"));
+  Origin local_non_standard_opaque_precursor_origin =
+      local_non_standard_origin.DeriveNewOpaqueOrigin();
+
+  // Call origin.CanBeDerivedFrom(url) for each of the following test cases
+  // and ensure that it returns |expected_value|
+  const struct {
+    const char* url;
+    raw_ptr<Origin> origin;
+    bool expected_value;
+  } kTestCases[] = {
+      {"https://a.com", &regular_origin, true},
+      // Web URL can commit in an opaque origin with precursor information.
+      // Example: iframe sandbox navigated to a.com.
+      {"https://a.com", &opaque_precursor_origin, true},
+      // URL that comes from the web can never commit in an opaque unique
+      // origin. It must have precursor information.
+      {"https://a.com", &opaque_unique_origin, false},
+
+      // Cross-origin URLs should never work.
+      {"https://b.com", &regular_origin, false},
+      {"https://b.com", &opaque_precursor_origin, false},
+
+      // data: URL can never commit in a regular, non-opaque origin.
+      {"data:text/html,foo", &regular_origin, false},
+      // This is the default case: data: URLs commit in opaque origin carrying
+      // precursor information for the origin that created them.
+      {"data:text/html,foo", &opaque_precursor_origin, true},
+      // Browser-initiated navigations can result in data: URL committing in
+      // opaque unique origin.
+      {"data:text/html,foo", &opaque_unique_origin, true},
+
+      // about:blank can commit in regular origin (default case for iframes).
+      {"about:blank", &regular_origin, true},
+      // This can happen if data: URL that originated at a.com creates an
+      // about:blank iframe.
+      {"about:blank", &opaque_precursor_origin, true},
+      // Browser-initiated navigations can result in about:blank URL committing
+      // in opaque unique origin.
+      {"about:blank", &opaque_unique_origin, true},
+
+      // Default behavior of srcdoc is to inherit the origin of the parent
+      // document.
+      {"about:srcdoc", &regular_origin, true},
+      // This happens for sandboxed srcdoc iframe.
+      {"about:srcdoc", &opaque_precursor_origin, true},
+      // This can happen with browser-initiated navigation to about:blank or
+      // data: URL, which in turn add srcdoc iframe.
+      {"about:srcdoc", &opaque_unique_origin, true},
+
+      // Just like srcdoc, blob: URLs can be created in all the cases.
+      {"blob:https://a.com/foo", &regular_origin, true},
+      {"blob:https://a.com/foo", &opaque_precursor_origin, true},
+      {"blob:https://a.com/foo", &opaque_unique_origin, true},
+
+      {"filesystem:https://a.com/foo", &regular_origin, true},
+      {"filesystem:https://a.com/foo", &opaque_precursor_origin, true},
+      // Unlike blob: URLs, filesystem: ones cannot be created in an unique
+      // opaque origin.
+      {"filesystem:https://a.com/foo", &opaque_unique_origin, false},
+
+      // file: URLs cannot result in regular web origins, regardless of
+      // opaqueness.
+      {"file:///etc/passwd", &regular_origin, false},
+      {"file:///etc/passwd", &opaque_precursor_origin, false},
+      // However, they can result in regular file: origin and an opaque one
+      // containing another file: origin as precursor.
+      {"file:///etc/passwd", &file_origin, true},
+      {"file:///etc/passwd", &file_opaque_precursor_origin, true},
+      // It should not be possible to get an opaque unique origin for file:
+      // as it is a standard scheme and will always result in a tuple origin
+      // or will always be derived by other origin.
+      // Note: file:// URLs should become unique opaque origins at some point.
+      {"file:///etc/passwd", &opaque_unique_origin, false},
+
+      // The same set as above, but including a host.
+      {"file://a.com/etc/passwd", &regular_origin, false},
+      {"file://a.com/etc/passwd", &opaque_precursor_origin, false},
+      {"file://a.com/etc/passwd", &file_host_origin, true},
+      {"file://a.com/etc/passwd", &file_host_opaque_precursor_origin, true},
+      {"file://a.com/etc/passwd", &opaque_unique_origin, false},
+
+      // Locally registered standard scheme should behave the same way
+      // as built-in standard schemes.
+      {"new-standard://host/foo", &new_standard_origin, true},
+      {"new-standard://host/foo", &new_standard_opaque_precursor_origin, true},
+      {"new-standard://host/foo", &opaque_unique_origin, false},
+      {"new-standard://host2/foo", &new_standard_origin, false},
+      {"new-standard://host2/foo", &new_standard_opaque_precursor_origin,
+       false},
+
+      // A non-standard scheme should never commit in an standard origin or
+      // opaque origin with standard precursor information.
+      {"non-standard-scheme://a.com/foo", &regular_origin, false},
+      {"non-standard-scheme://a.com/foo", &opaque_precursor_origin, false},
+      // However, it should be fine to commit in unique opaque origins or in its
+      // own origin.
+      // Note: since non-standard scheme URLs don't parse out anything
+      // but the scheme, using a random different hostname here would work.
+      {"non-standard-scheme://b.com/foo2", &opaque_unique_origin, true},
+      {"non-standard-scheme://b.com/foo3", &non_standard_scheme_origin, true},
+      {"non-standard-scheme://b.com/foo4",
+       &non_standard_opaque_precursor_origin, true},
+
+      // No access scheme can only commit in opaque origin.
+      {"standard-but-noaccess://a.com/foo", &regular_origin, false},
+      {"standard-but-noaccess://a.com/foo", &opaque_precursor_origin, false},
+      {"standard-but-noaccess://a.com/foo", &opaque_unique_origin, true},
+      {"standard-but-noaccess://a.com/foo", &no_access_origin, true},
+      {"standard-but-noaccess://a.com/foo", &no_access_opaque_precursor_origin,
+       true},
+      {"standard-but-noaccess://b.com/foo", &no_access_origin, true},
+      {"standard-but-noaccess://b.com/foo", &no_access_opaque_precursor_origin,
+       true},
+
+      // Local schemes can be non-standard, verify they also work as expected.
+      {"local-but-nonstandard://a.com", &regular_origin, false},
+      {"local-but-nonstandard://a.com", &opaque_precursor_origin, false},
+      {"local-but-nonstandard://a.com", &opaque_unique_origin, true},
+      {"local-but-nonstandard://a.com", &local_non_standard_origin, true},
+      {"local-but-nonstandard://a.com",
+       &local_non_standard_opaque_precursor_origin, true},
+  };
+
+  for (const auto& test_case : kTestCases) {
+    SCOPED_TRACE(testing::Message() << "(origin, url): (" << *test_case.origin
+                                    << ", " << test_case.url << ")");
+    EXPECT_EQ(test_case.expected_value,
+              test_case.origin->CanBeDerivedFrom(GURL(test_case.url)));
+  }
+}
+
+TEST_F(OriginTest, GetDebugString) {
+  Origin http_origin = Origin::Create(GURL("http://192.168.9.1"));
+  EXPECT_STREQ(http_origin.GetDebugString().c_str(), "http://192.168.9.1");
+
+  Origin http_opaque_origin = http_origin.DeriveNewOpaqueOrigin();
+  EXPECT_THAT(
+      http_opaque_origin.GetDebugString().c_str(),
+      ::testing::MatchesRegex(
+          "null \\[internally: \\(\\w*\\) derived from http://192.168.9.1\\]"));
+  EXPECT_THAT(
+      http_opaque_origin.GetDebugString(false /* include_nonce */).c_str(),
+      ::testing::MatchesRegex(
+          "null \\[internally: derived from http://192.168.9.1\\]"));
+
+  Origin data_origin = Origin::Create(GURL("data:"));
+  EXPECT_STREQ(data_origin.GetDebugString().c_str(),
+               "null [internally: (nonce TBD) anonymous]");
+
+  // The nonce of the origin will be initialized if a new opaque origin is
+  // derived.
+  Origin data_derived_origin = data_origin.DeriveNewOpaqueOrigin();
+  EXPECT_THAT(
+      data_derived_origin.GetDebugString().c_str(),
+      ::testing::MatchesRegex("null \\[internally: \\(\\w*\\) anonymous\\]"));
+  EXPECT_THAT(
+      data_derived_origin.GetDebugString(false /* include_nonce */).c_str(),
+      ::testing::MatchesRegex("null \\[internally: anonymous\\]"));
+
+  Origin file_origin = Origin::Create(GURL("file:///etc/passwd"));
+  EXPECT_STREQ(file_origin.GetDebugString().c_str(),
+               "file:// [internally: file://]");
+
+  Origin file_server_origin =
+      Origin::Create(GURL("file://example.com/etc/passwd"));
+  EXPECT_STREQ(file_server_origin.GetDebugString().c_str(),
+               "file:// [internally: file://example.com]");
+}
+
+TEST_F(OriginTest, Deserialize) {
+  std::vector<GURL> valid_urls = {
+      GURL("https://a.com"),         GURL("http://a"),
+      GURL("http://a:80"),           GURL("file://a.com/etc/passwd"),
+      GURL("file:///etc/passwd"),    GURL("http://192.168.1.1"),
+      GURL("http://[2001:db8::1]/"),
+  };
+  for (const GURL& url : valid_urls) {
+    SCOPED_TRACE(url.spec());
+    Origin origin = Origin::Create(url);
+    absl::optional<std::string> serialized = SerializeWithNonce(origin);
+    ASSERT_TRUE(serialized);
+
+    absl::optional<Origin> deserialized = Deserialize(std::move(*serialized));
+    ASSERT_TRUE(deserialized.has_value());
+
+    EXPECT_TRUE(DoEqualityComparisons(origin, deserialized.value(), true));
+    EXPECT_EQ(origin.GetDebugString(), deserialized.value().GetDebugString());
+  }
+}
+
+TEST_F(OriginTest, DeserializeInvalid) {
+  EXPECT_EQ(absl::nullopt, Deserialize(std::string()));
+  EXPECT_EQ(absl::nullopt, Deserialize("deadbeef"));
+  EXPECT_EQ(absl::nullopt, Deserialize("0123456789"));
+  EXPECT_EQ(absl::nullopt, Deserialize("https://a.com"));
+  EXPECT_EQ(absl::nullopt, Deserialize("https://192.168.1.1"));
+}
+
+TEST_F(OriginTest, SerializeTBDNonce) {
+  std::vector<GURL> invalid_urls = {
+      GURL("data:uniqueness"),       GURL("data:,"),
+      GURL("data:text/html,Hello!"), GURL("javascript:alert(1)"),
+      GURL("about:blank"),           GURL("google.com"),
+  };
+  for (const GURL& url : invalid_urls) {
+    SCOPED_TRACE(url.spec());
+    Origin origin = Origin::Create(url);
+    absl::optional<std::string> serialized = SerializeWithNonce(origin);
+    absl::optional<Origin> deserialized = Deserialize(std::move(*serialized));
+    ASSERT_TRUE(deserialized.has_value());
+
+    // Can't use DoEqualityComparisons here since empty nonces are never ==
+    // unless they are the same object.
+    EXPECT_EQ(origin.GetDebugString(), deserialized.value().GetDebugString());
+  }
+
+  {
+    // Same basic test as above, but without a GURL to create tuple_.
+    Origin opaque;
+    absl::optional<std::string> serialized = SerializeWithNonce(opaque);
+    ASSERT_TRUE(serialized);
+
+    absl::optional<Origin> deserialized = Deserialize(std::move(*serialized));
+    ASSERT_TRUE(deserialized.has_value());
+
+    // Can't use DoEqualityComparisons here since empty nonces are never ==
+    // unless they are the same object.
+    EXPECT_EQ(opaque.GetDebugString(), deserialized.value().GetDebugString());
+  }
+
+  // Now force initialization of the nonce prior to serialization.
+  for (const GURL& url : invalid_urls) {
+    SCOPED_TRACE(url.spec());
+    Origin origin = Origin::Create(url);
+    absl::optional<std::string> serialized =
+        SerializeWithNonceAndInitIfNeeded(origin);
+    absl::optional<Origin> deserialized = Deserialize(std::move(*serialized));
+    ASSERT_TRUE(deserialized.has_value());
+
+    // The nonce should have been initialized prior to Serialization().
+    EXPECT_EQ(origin, deserialized.value());
+  }
+}
+
+TEST_F(OriginTest, DeserializeValidNonce) {
+  Origin opaque;
+  GetNonce(opaque);
+
+  absl::optional<std::string> serialized = SerializeWithNonce(opaque);
+  ASSERT_TRUE(serialized);
+
+  absl::optional<Origin> deserialized = Deserialize(std::move(*serialized));
+  ASSERT_TRUE(deserialized.has_value());
+
+  EXPECT_TRUE(DoEqualityComparisons(opaque, deserialized.value(), true));
+  EXPECT_EQ(opaque.GetDebugString(), deserialized.value().GetDebugString());
+}
+
+TEST_F(OriginTest, IsSameOriginWith) {
+  url::Origin opaque_origin;
+  GURL foo_url = GURL("https://foo.com/path");
+  url::Origin foo_origin = url::Origin::Create(foo_url);
+  GURL bar_url = GURL("https://bar.com/path");
+  url::Origin bar_origin = url::Origin::Create(bar_url);
+
+  EXPECT_FALSE(opaque_origin.IsSameOriginWith(foo_origin));
+  EXPECT_FALSE(opaque_origin.IsSameOriginWith(foo_url));
+
+  EXPECT_TRUE(foo_origin.IsSameOriginWith(foo_origin));
+  EXPECT_TRUE(foo_origin.IsSameOriginWith(foo_url));
+
+  EXPECT_FALSE(foo_origin.IsSameOriginWith(bar_origin));
+  EXPECT_FALSE(foo_origin.IsSameOriginWith(bar_url));
+
+  // Documenting legacy behavior.  This doesn't necessarily mean that the legacy
+  // behavior is correct (or desirable in the long-term).
+  EXPECT_FALSE(foo_origin.IsSameOriginWith(GURL("about:blank")));
+  EXPECT_FALSE(foo_origin.IsSameOriginWith(GURL()));  // Invalid GURL.
+  EXPECT_TRUE(foo_origin.IsSameOriginWith(GURL("blob:https://foo.com/guid")));
+}
+
+INSTANTIATE_TYPED_TEST_SUITE_P(UrlOrigin,
+                               AbstractOriginTest,
+                               UrlOriginTestTraits);
+
+}  // namespace url
diff --git a/run_all_perftests.cc b/run_all_perftests.cc
new file mode 100644
index 00000000000..f11fd29ac00
--- /dev/null
+++ b/run_all_perftests.cc
@@ -0,0 +1,14 @@
+// Copyright 2019 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/functional/bind.h"
+#include "base/test/launcher/unit_test_launcher.h"
+#include "base/test/perf_test_suite.h"
+
+int main(int argc, char** argv) {
+  base::PerfTestSuite test_suite(argc, argv);
+  return base::LaunchUnitTestsSerially(
+      argc, argv,
+      base::BindOnce(&base::TestSuite::Run, base::Unretained(&test_suite)));
+}
diff --git a/run_all_unittests.cc b/run_all_unittests.cc
new file mode 100644
index 00000000000..91f5613401e
--- /dev/null
+++ b/run_all_unittests.cc
@@ -0,0 +1,27 @@
+// Copyright 2016 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <memory>
+
+#include "base/functional/bind.h"
+#include "base/test/launcher/unit_test_launcher.h"
+#include "base/test/test_io_thread.h"
+#include "base/test/test_suite.h"
+#include "build/build_config.h"
+
+#if !BUILDFLAG(IS_IOS)
+#include "mojo/core/embedder/embedder.h"  // nogncheck
+#endif
+
+int main(int argc, char** argv) {
+  base::TestSuite test_suite(argc, argv);
+
+#if !BUILDFLAG(IS_IOS)
+  mojo::core::Init();
+#endif
+
+  return base::LaunchUnitTests(
+      argc, argv,
+      base::BindOnce(&base::TestSuite::Run, base::Unretained(&test_suite)));
+}
diff --git a/scheme_host_port.cc b/scheme_host_port.cc
new file mode 100644
index 00000000000..490ae9a78c4
--- /dev/null
+++ b/scheme_host_port.cc
@@ -0,0 +1,278 @@
+// Copyright 2015 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/scheme_host_port.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <ostream>
+#include <tuple>
+
+#include "base/check_op.h"
+#include "base/containers/contains.h"
+#include "base/notreached.h"
+#include "base/numerics/safe_conversions.h"
+#include "base/strings/string_number_conversions.h"
+#include "base/strings/string_piece.h"
+#include "url/gurl.h"
+#include "url/third_party/mozilla/url_parse.h"
+#include "url/url_canon.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_constants.h"
+#include "url/url_util.h"
+
+namespace url {
+
+namespace {
+
+bool IsCanonicalHost(const base::StringPiece& host) {
+  std::string canon_host;
+
+  // Try to canonicalize the host (copy/pasted from net/base. :( ).
+  const Component raw_host_component(0,
+                                     base::checked_cast<int>(host.length()));
+  StdStringCanonOutput canon_host_output(&canon_host);
+  CanonHostInfo host_info;
+  CanonicalizeHostVerbose(host.data(), raw_host_component,
+                          &canon_host_output, &host_info);
+
+  if (host_info.out_host.is_nonempty() &&
+      host_info.family != CanonHostInfo::BROKEN) {
+    // Success!  Assert that there's no extra garbage.
+    canon_host_output.Complete();
+    DCHECK_EQ(host_info.out_host.len, static_cast<int>(canon_host.length()));
+  } else {
+    // Empty host, or canonicalization failed.
+    canon_host.clear();
+  }
+
+  return host == canon_host;
+}
+
+// Note: When changing IsValidInput, consider also updating
+// ShouldTreatAsOpaqueOrigin in Blink (there might be existing differences in
+// behavior between these 2 layers, but we should avoid introducing new
+// differences).
+bool IsValidInput(const base::StringPiece& scheme,
+                  const base::StringPiece& host,
+                  uint16_t port,
+                  SchemeHostPort::ConstructPolicy policy) {
+  // Empty schemes are never valid.
+  if (scheme.empty())
+    return false;
+
+  // about:blank and other no-access schemes translate into an opaque origin.
+  // This helps consistency with ShouldTreatAsOpaqueOrigin in Blink.
+  if (base::Contains(GetNoAccessSchemes(), scheme))
+    return false;
+
+  SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
+  bool is_standard = GetStandardSchemeType(
+      scheme.data(),
+      Component(0, base::checked_cast<int>(scheme.length())),
+      &scheme_type);
+  if (!is_standard) {
+    // To be consistent with ShouldTreatAsOpaqueOrigin in Blink, local
+    // non-standard schemes are currently allowed to be tuple origins.
+    // Nonstandard schemes don't have hostnames, so their tuple is just
+    // ("protocol", "", 0).
+    //
+    // TODO: Migrate "content:" and "externalfile:" to be standard schemes, and
+    // remove this local scheme exception.
+    if (base::Contains(GetLocalSchemes(), scheme) && host.empty() && port == 0)
+      return true;
+
+    // Otherwise, allow non-standard schemes only if the Android WebView
+    // workaround is enabled.
+    return AllowNonStandardSchemesForAndroidWebView();
+  }
+
+  switch (scheme_type) {
+    case SCHEME_WITH_HOST_AND_PORT:
+    case SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION:
+      // A URL with |scheme| is required to have the host and port, so return an
+      // invalid instance if host is not given.  Note that a valid port is
+      // always provided by SchemeHostPort(const GURL&) constructor (a missing
+      // port is replaced with a default port if needed by
+      // GURL::EffectiveIntPort()).
+      if (host.empty())
+        return false;
+
+      // Don't do an expensive canonicalization if the host is already
+      // canonicalized.
+      DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
+             IsCanonicalHost(host));
+      if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
+          !IsCanonicalHost(host)) {
+        return false;
+      }
+
+      return true;
+
+    case SCHEME_WITH_HOST:
+      if (port != 0) {
+        // Return an invalid object if a URL with the scheme never represents
+        // the port data but the given |port| is non-zero.
+        return false;
+      }
+
+      // Don't do an expensive canonicalization if the host is already
+      // canonicalized.
+      DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
+             IsCanonicalHost(host));
+      if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
+          !IsCanonicalHost(host)) {
+        return false;
+      }
+
+      return true;
+
+    case SCHEME_WITHOUT_AUTHORITY:
+      return false;
+
+    default:
+      NOTREACHED();
+      return false;
+  }
+}
+
+}  // namespace
+
+SchemeHostPort::SchemeHostPort() = default;
+
+SchemeHostPort::SchemeHostPort(std::string scheme,
+                               std::string host,
+                               uint16_t port,
+                               ConstructPolicy policy) {
+  if (!IsValidInput(scheme, host, port, policy)) {
+    DCHECK(!IsValid());
+    return;
+  }
+
+  scheme_ = std::move(scheme);
+  host_ = std::move(host);
+  port_ = port;
+  DCHECK(IsValid()) << "Scheme: " << scheme_ << " Host: " << host_
+                    << " Port: " << port;
+}
+
+SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
+                               base::StringPiece host,
+                               uint16_t port)
+    : SchemeHostPort(std::string(scheme),
+                     std::string(host),
+                     port,
+                     ConstructPolicy::CHECK_CANONICALIZATION) {}
+
+SchemeHostPort::SchemeHostPort(const GURL& url) {
+  if (!url.is_valid())
+    return;
+
+  base::StringPiece scheme = url.scheme_piece();
+  base::StringPiece host = url.host_piece();
+
+  // A valid GURL never returns PORT_INVALID.
+  int port = url.EffectiveIntPort();
+  if (port == PORT_UNSPECIFIED) {
+    port = 0;
+  } else {
+    DCHECK_GE(port, 0);
+    DCHECK_LE(port, 65535);
+  }
+
+  if (!IsValidInput(scheme, host, port, ALREADY_CANONICALIZED))
+    return;
+
+  scheme_ = std::string(scheme);
+  host_ = std::string(host);
+  port_ = port;
+}
+
+SchemeHostPort::~SchemeHostPort() = default;
+
+bool SchemeHostPort::IsValid() const {
+  // It suffices to just check |scheme_| for emptiness; the other fields are
+  // never present without it.
+  DCHECK(!scheme_.empty() || host_.empty());
+  DCHECK(!scheme_.empty() || port_ == 0);
+  return !scheme_.empty();
+}
+
+std::string SchemeHostPort::Serialize() const {
+  // Null checking for |parsed| in SerializeInternal is probably slower than
+  // just filling it in and discarding it here.
+  url::Parsed parsed;
+  return SerializeInternal(&parsed);
+}
+
+GURL SchemeHostPort::GetURL() const {
+  url::Parsed parsed;
+  std::string serialized = SerializeInternal(&parsed);
+
+  if (!IsValid())
+    return GURL(std::move(serialized), parsed, false);
+
+  // SchemeHostPort does not have enough information to determine if an empty
+  // host is valid or not for the given scheme. Force re-parsing.
+  DCHECK(!scheme_.empty());
+  if (host_.empty())
+    return GURL(serialized);
+
+  // If the serialized string is passed to GURL for parsing, it will append an
+  // empty path "/". Add that here. Note: per RFC 6454 we cannot do this for
+  // normal Origin serialization.
+  DCHECK(!parsed.path.is_valid());
+  parsed.path = Component(serialized.length(), 1);
+  serialized.append("/");
+  return GURL(std::move(serialized), parsed, true);
+}
+
+bool SchemeHostPort::operator<(const SchemeHostPort& other) const {
+  return std::tie(port_, scheme_, host_) <
+         std::tie(other.port_, other.scheme_, other.host_);
+}
+
+std::string SchemeHostPort::SerializeInternal(url::Parsed* parsed) const {
+  std::string result;
+  if (!IsValid())
+    return result;
+
+  // Reserve enough space for the "normal" case of scheme://host/.
+  result.reserve(scheme_.size() + host_.size() + 4);
+
+  if (!scheme_.empty()) {
+    parsed->scheme = Component(0, scheme_.length());
+    result.append(scheme_);
+  }
+
+  result.append(kStandardSchemeSeparator);
+
+  if (!host_.empty()) {
+    parsed->host = Component(result.length(), host_.length());
+    result.append(host_);
+  }
+
+  // Omit the port component if the port matches with the default port
+  // defined for the scheme, if any.
+  int default_port = DefaultPortForScheme(scheme_.data(),
+                                          static_cast<int>(scheme_.length()));
+  if (default_port == PORT_UNSPECIFIED)
+    return result;
+  if (port_ != default_port) {
+    result.push_back(':');
+    std::string port(base::NumberToString(port_));
+    parsed->port = Component(result.length(), port.length());
+    result.append(std::move(port));
+  }
+
+  return result;
+}
+
+std::ostream& operator<<(std::ostream& out,
+                         const SchemeHostPort& scheme_host_port) {
+  return out << scheme_host_port.Serialize();
+}
+
+}  // namespace url
diff --git a/scheme_host_port.h b/scheme_host_port.h
new file mode 100644
index 00000000000..a98e7affdb6
--- /dev/null
+++ b/scheme_host_port.h
@@ -0,0 +1,173 @@
+// Copyright 2015 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_SCHEME_HOST_PORT_H_
+#define URL_SCHEME_HOST_PORT_H_
+
+#include <stdint.h>
+
+#include <string>
+
+#include "base/component_export.h"
+#include "base/strings/string_piece.h"
+
+class GURL;
+
+namespace url {
+
+struct Parsed;
+
+// This class represents a (scheme, host, port) tuple extracted from a URL.
+//
+// The primary purpose of this class is to represent relevant network-authority
+// information for a URL. It is _not_ an Origin, as described in RFC 6454. In
+// particular, it is generally NOT the right thing to use for security
+// decisions.
+//
+// Instead, this class is a mechanism for simplifying URLs with standard schemes
+// (that is, those which follow the generic syntax of RFC 3986) down to the
+// uniquely identifying information necessary for network fetches. This makes it
+// suitable as a cache key for a collection of active connections, for instance.
+// It may, however, be inappropriate to use as a cache key for persistent
+// storage associated with a host.
+//
+// In particular, note that:
+//
+// * SchemeHostPort can only represent schemes which follow the RFC 3986 syntax
+//   (e.g. those registered with GURL as "standard schemes"). Non-standard
+//   schemes such as "blob", "filesystem", "data", and "javascript" can only be
+//   represented as invalid SchemeHostPort objects.
+//
+// * For example, the "file" scheme follows the standard syntax, but it is
+//   important to note that the authority portion (host, port) is optional.
+//   URLs without an authority portion will be represented with an empty string
+//   for the host, and a port of 0 (e.g. "file:///etc/hosts" =>
+//   ("file", "", 0)), and URLs with a host-only authority portion will be
+//   represented with a port of 0 (e.g. "file://example.com/etc/hosts" =>
+//   ("file", "example.com", 0)). See Section 3 of RFC 3986 to better understand
+//   these constructs.
+//
+// * SchemeHostPort has no notion of the Origin concept (RFC 6454), and in
+//   particular, it has no notion of an opaque Origin. If you need to take
+//   opaque origins into account (and, if you're making security-relevant
+//   decisions then you absolutely do), please use 'url::Origin' instead.
+//
+// Usage:
+//
+// * SchemeHostPort objects are commonly created from GURL objects:
+//
+//     GURL url("https://example.com/");
+//     url::SchemeHostPort tuple(url);
+//     tuple.scheme(); // "https"
+//     tuple.host(); // "example.com"
+//     tuple.port(); // 443
+//
+// * Objects may also be explicitly created and compared:
+//
+//     url::SchemeHostPort tuple(url::kHttpsScheme, "example.com", 443);
+//     tuple.scheme(); // "https"
+//     tuple.host(); // "example.com"
+//     tuple.port(); // 443
+//
+//     GURL url("https://example.com/");
+//     tuple == url::SchemeHostPort(url); // true
+class COMPONENT_EXPORT(URL) SchemeHostPort {
+ public:
+  // Creates an invalid (scheme, host, port) tuple, which represents an invalid
+  // or non-standard URL.
+  SchemeHostPort();
+
+  // Creates a (scheme, host, port) tuple. |host| must be a canonicalized
+  // A-label (that is, '☃.net' must be provided as 'xn--n3h.net'). |scheme|
+  // must be a standard scheme. |port| must be 0 if |scheme| does not support
+  // ports (e.g. 'file').
+  //
+  // Copies the data in |scheme| and |host|.
+  SchemeHostPort(base::StringPiece scheme,
+                 base::StringPiece host,
+                 uint16_t port);
+
+  // Metadata influencing whether or not the constructor should sanity check
+  // host canonicalization.
+  enum ConstructPolicy { CHECK_CANONICALIZATION, ALREADY_CANONICALIZED };
+
+  // Creates a (scheme, host, port) tuple without performing sanity checking
+  // that the host and port are canonicalized. This should only be used when
+  // converting between already normalized types, and should NOT be used for
+  // IPC.
+  SchemeHostPort(std::string scheme,
+                 std::string host,
+                 uint16_t port,
+                 ConstructPolicy policy);
+
+  // Creates a (scheme, host, port) tuple from |url|, as described at
+  // https://tools.ietf.org/html/rfc6454#section-4
+  //
+  // If |url| is invalid or non-standard, the result will be an invalid
+  // SchemeHostPort object.
+  explicit SchemeHostPort(const GURL& url);
+
+  // Copyable and movable.
+  SchemeHostPort(const SchemeHostPort&) = default;
+  SchemeHostPort& operator=(const SchemeHostPort&) = default;
+  SchemeHostPort(SchemeHostPort&&) noexcept = default;
+  SchemeHostPort& operator=(SchemeHostPort&&) noexcept = default;
+
+  ~SchemeHostPort();
+
+  // Returns the host component, in URL form. That is all IDN domain names will
+  // be expressed as A-Labels ('☃.net' will be returned as 'xn--n3h.net'), and
+  // and all IPv6 addresses will be enclosed in brackets ("[2001:db8::1]").
+  const std::string& host() const { return host_; }
+  const std::string& scheme() const { return scheme_; }
+  uint16_t port() const { return port_; }
+  bool IsValid() const;
+
+  // Serializes the SchemeHostPort tuple to a canonical form.
+  //
+  // While this string form resembles the Origin serialization specified in
+  // Section 6.2 of RFC 6454, it is important to note that invalid
+  // SchemeHostPort tuples serialize to the empty string, rather than being
+  // serialized as would an opaque Origin.
+  std::string Serialize() const;
+
+  // Efficiently returns what GURL(Serialize()) would return, without needing to
+  // re-parse the URL. Note: this still performs allocations to copy data into
+  // GURL, so please avoid using this method if you only need to work on
+  // schemes, hosts, or ports individually.
+  // For example, see crrev.com/c/3637099/comments/782360d0_e14757be.
+  GURL GetURL() const;
+
+  // Two SchemeHostPort objects are "equal" iff their schemes, hosts, and ports
+  // are exact matches.
+  //
+  // Note that this comparison is _not_ the same as an origin-based comparison.
+  // In particular, invalid SchemeHostPort objects match each other (and
+  // themselves). Opaque origins, on the other hand, would not.
+  bool operator==(const SchemeHostPort& other) const {
+    return port_ == other.port() && scheme_ == other.scheme() &&
+           host_ == other.host();
+  }
+  bool operator!=(const SchemeHostPort& other) const {
+    return !(*this == other);
+  }
+  // Allows SchemeHostPort to be used as a key in STL (for example, a std::set
+  // or std::map).
+  bool operator<(const SchemeHostPort& other) const;
+
+ private:
+  std::string SerializeInternal(url::Parsed* parsed) const;
+
+  std::string scheme_;
+  std::string host_;
+  uint16_t port_ = 0;
+};
+
+COMPONENT_EXPORT(URL)
+std::ostream& operator<<(std::ostream& out,
+                         const SchemeHostPort& scheme_host_port);
+
+}  // namespace url
+
+#endif  // URL_SCHEME_HOST_PORT_H_
diff --git a/scheme_host_port_unittest.cc b/scheme_host_port_unittest.cc
new file mode 100644
index 00000000000..49bcf25362e
--- /dev/null
+++ b/scheme_host_port_unittest.cc
@@ -0,0 +1,294 @@
+// Copyright 2015 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/scheme_host_port.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/gurl.h"
+#include "url/url_util.h"
+
+namespace {
+
+class SchemeHostPortTest : public testing::Test {
+ public:
+  SchemeHostPortTest() = default;
+
+  SchemeHostPortTest(const SchemeHostPortTest&) = delete;
+  SchemeHostPortTest& operator=(const SchemeHostPortTest&) = delete;
+
+  ~SchemeHostPortTest() override = default;
+
+ private:
+  url::ScopedSchemeRegistryForTests scoped_registry_;
+};
+
+void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) {
+  EXPECT_EQ(a, b);
+  const url::Parsed& a_parsed = a.parsed_for_possibly_invalid_spec();
+  const url::Parsed& b_parsed = b.parsed_for_possibly_invalid_spec();
+  EXPECT_EQ(a_parsed.scheme.begin, b_parsed.scheme.begin);
+  EXPECT_EQ(a_parsed.scheme.len, b_parsed.scheme.len);
+  EXPECT_EQ(a_parsed.username.begin, b_parsed.username.begin);
+  EXPECT_EQ(a_parsed.username.len, b_parsed.username.len);
+  EXPECT_EQ(a_parsed.password.begin, b_parsed.password.begin);
+  EXPECT_EQ(a_parsed.password.len, b_parsed.password.len);
+  EXPECT_EQ(a_parsed.host.begin, b_parsed.host.begin);
+  EXPECT_EQ(a_parsed.host.len, b_parsed.host.len);
+  EXPECT_EQ(a_parsed.port.begin, b_parsed.port.begin);
+  EXPECT_EQ(a_parsed.port.len, b_parsed.port.len);
+  EXPECT_EQ(a_parsed.path.begin, b_parsed.path.begin);
+  EXPECT_EQ(a_parsed.path.len, b_parsed.path.len);
+  EXPECT_EQ(a_parsed.query.begin, b_parsed.query.begin);
+  EXPECT_EQ(a_parsed.query.len, b_parsed.query.len);
+  EXPECT_EQ(a_parsed.ref.begin, b_parsed.ref.begin);
+  EXPECT_EQ(a_parsed.ref.len, b_parsed.ref.len);
+}
+
+TEST_F(SchemeHostPortTest, Invalid) {
+  url::SchemeHostPort invalid;
+  EXPECT_EQ("", invalid.scheme());
+  EXPECT_EQ("", invalid.host());
+  EXPECT_EQ(0, invalid.port());
+  EXPECT_FALSE(invalid.IsValid());
+  EXPECT_EQ(invalid, invalid);
+
+  const char* urls[] = {
+      // about:, data:, javascript: and other no-access schemes translate into
+      // an invalid SchemeHostPort
+      "about:blank", "about:blank#ref", "about:blank?query=123", "about:srcdoc",
+      "about:srcdoc#ref", "about:srcdoc?query=123", "data:text/html,Hello!",
+      "javascript:alert(1)",
+
+      // GURLs where GURL::is_valid returns false translate into an invalid
+      // SchemeHostPort.
+      "file://example.com:443/etc/passwd", "#!^%!$!&*",
+
+      // These schemes do not follow the generic URL syntax, so make sure we
+      // treat them as invalid (scheme, host, port) tuples (even though such
+      // URLs' _Origin_ might have a (scheme, host, port) tuple, they themselves
+      // do not). This is only *implicitly* checked in the code, by means of
+      // blob schemes not being standard, and filesystem schemes having type
+      // SCHEME_WITHOUT_AUTHORITY. If conditions change such that the implicit
+      // checks no longer hold, this policy should be made explicit.
+      "blob:https://example.com/uuid-goes-here",
+      "filesystem:https://example.com/temporary/yay.png"};
+
+  for (auto* test : urls) {
+    SCOPED_TRACE(test);
+    GURL url(test);
+    url::SchemeHostPort tuple(url);
+    EXPECT_EQ("", tuple.scheme());
+    EXPECT_EQ("", tuple.host());
+    EXPECT_EQ(0, tuple.port());
+    EXPECT_FALSE(tuple.IsValid());
+    EXPECT_EQ(tuple, tuple);
+    EXPECT_EQ(tuple, invalid);
+    EXPECT_EQ(invalid, tuple);
+    ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
+  }
+}
+
+TEST_F(SchemeHostPortTest, ExplicitConstruction) {
+  struct TestCases {
+    const char* scheme;
+    const char* host;
+    uint16_t port;
+  } cases[] = {
+      {"http", "example.com", 80},
+      {"http", "example.com", 123},
+      {"http", "example.com", 0},  // 0 is a valid port for http.
+      {"https", "example.com", 443},
+      {"https", "example.com", 123},
+      {"file", "", 0},  // 0 indicates "no port" for file: scheme.
+      {"file", "example.com", 0},
+  };
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
+                                    << test.port);
+    url::SchemeHostPort tuple(test.scheme, test.host, test.port);
+    EXPECT_EQ(test.scheme, tuple.scheme());
+    EXPECT_EQ(test.host, tuple.host());
+    EXPECT_EQ(test.port, tuple.port());
+    EXPECT_TRUE(tuple.IsValid());
+    EXPECT_EQ(tuple, tuple);
+    ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
+  }
+}
+
+TEST_F(SchemeHostPortTest, InvalidConstruction) {
+  struct TestCases {
+    const char* scheme;
+    const char* host;
+    uint16_t port;
+  } cases[] = {{"", "", 0},
+               {"data", "", 0},
+               {"blob", "", 0},
+               {"filesystem", "", 0},
+               {"http", "", 80},
+               {"data", "example.com", 80},
+               {"http", "☃.net", 80},
+               {"http\nmore", "example.com", 80},
+               {"http\rmore", "example.com", 80},
+               {"http\n", "example.com", 80},
+               {"http\r", "example.com", 80},
+               {"http", "example.com\nnot-example.com", 80},
+               {"http", "example.com\rnot-example.com", 80},
+               {"http", "example.com\n", 80},
+               {"http", "example.com\r", 80},
+               {"file", "", 80}};  // Can''t have a port for file: scheme.
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
+                                    << test.port);
+    url::SchemeHostPort tuple(test.scheme, test.host, test.port);
+    EXPECT_EQ("", tuple.scheme());
+    EXPECT_EQ("", tuple.host());
+    EXPECT_EQ(0, tuple.port());
+    EXPECT_FALSE(tuple.IsValid());
+    EXPECT_EQ(tuple, tuple);
+    ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
+  }
+}
+
+TEST_F(SchemeHostPortTest, InvalidConstructionWithEmbeddedNulls) {
+  struct TestCases {
+    const char* scheme;
+    size_t scheme_length;
+    const char* host;
+    size_t host_length;
+    uint16_t port;
+  } cases[] = {{"http\0more", 9, "example.com", 11, 80},
+               {"http\0", 5, "example.com", 11, 80},
+               {"\0http", 5, "example.com", 11, 80},
+               {"http", 4, "example.com\0not-example.com", 27, 80},
+               {"http", 4, "example.com\0", 12, 80},
+               {"http", 4, "\0example.com", 12, 80}};
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":"
+                                    << test.port);
+    url::SchemeHostPort tuple(std::string(test.scheme, test.scheme_length),
+                              std::string(test.host, test.host_length),
+                              test.port);
+    EXPECT_EQ("", tuple.scheme());
+    EXPECT_EQ("", tuple.host());
+    EXPECT_EQ(0, tuple.port());
+    EXPECT_FALSE(tuple.IsValid());
+    ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
+  }
+}
+
+TEST_F(SchemeHostPortTest, GURLConstruction) {
+  struct TestCases {
+    const char* url;
+    const char* scheme;
+    const char* host;
+    uint16_t port;
+  } cases[] = {
+      {"http://192.168.9.1/", "http", "192.168.9.1", 80},
+      {"http://[2001:db8::1]/", "http", "[2001:db8::1]", 80},
+      {"http://☃.net/", "http", "xn--n3h.net", 80},
+      {"http://example.com/", "http", "example.com", 80},
+      {"http://example.com:123/", "http", "example.com", 123},
+      {"https://example.com/", "https", "example.com", 443},
+      {"https://example.com:123/", "https", "example.com", 123},
+      {"file:///etc/passwd", "file", "", 0},
+      {"file://example.com/etc/passwd", "file", "example.com", 0},
+      {"http://u:p@example.com/", "http", "example.com", 80},
+      {"http://u:p@example.com/path", "http", "example.com", 80},
+      {"http://u:p@example.com/path?123", "http", "example.com", 80},
+      {"http://u:p@example.com/path?123#hash", "http", "example.com", 80},
+  };
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(test.url);
+    GURL url(test.url);
+    EXPECT_TRUE(url.is_valid());
+    url::SchemeHostPort tuple(url);
+    EXPECT_EQ(test.scheme, tuple.scheme());
+    EXPECT_EQ(test.host, tuple.host());
+    EXPECT_EQ(test.port, tuple.port());
+    EXPECT_TRUE(tuple.IsValid());
+    EXPECT_EQ(tuple, tuple);
+    ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
+  }
+}
+
+TEST_F(SchemeHostPortTest, Serialization) {
+  struct TestCases {
+    const char* url;
+    const char* expected;
+  } cases[] = {
+      {"http://192.168.9.1/", "http://192.168.9.1"},
+      {"http://[2001:db8::1]/", "http://[2001:db8::1]"},
+      {"http://☃.net/", "http://xn--n3h.net"},
+      {"http://example.com/", "http://example.com"},
+      {"http://example.com:123/", "http://example.com:123"},
+      {"https://example.com/", "https://example.com"},
+      {"https://example.com:123/", "https://example.com:123"},
+      {"file:///etc/passwd", "file://"},
+      {"file://example.com/etc/passwd", "file://example.com"},
+      {"https://example.com:0/", "https://example.com:0"},
+  };
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(test.url);
+    GURL url(test.url);
+    url::SchemeHostPort tuple(url);
+    EXPECT_EQ(test.expected, tuple.Serialize());
+    ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL());
+  }
+}
+
+TEST_F(SchemeHostPortTest, Comparison) {
+  // These tuples are arranged in increasing order:
+  struct SchemeHostPorts {
+    const char* scheme;
+    const char* host;
+    uint16_t port;
+  } tuples[] = {
+      {"http", "a", 80},
+      {"http", "b", 80},
+      {"https", "a", 80},
+      {"https", "b", 80},
+      {"http", "a", 81},
+      {"http", "b", 81},
+      {"https", "a", 81},
+      {"https", "b", 81},
+  };
+
+  for (size_t i = 0; i < std::size(tuples); i++) {
+    url::SchemeHostPort current(tuples[i].scheme, tuples[i].host,
+                                tuples[i].port);
+    for (size_t j = i; j < std::size(tuples); j++) {
+      url::SchemeHostPort to_compare(tuples[j].scheme, tuples[j].host,
+                                     tuples[j].port);
+      EXPECT_EQ(i < j, current < to_compare) << i << " < " << j;
+      EXPECT_EQ(j < i, to_compare < current) << j << " < " << i;
+    }
+  }
+}
+
+// Some schemes have optional authority. Make sure that GURL conversion from
+// SchemeHostPort is not opinionated in that regard. For more info, See
+// crbug.com/820194, where we considered all SchemeHostPorts with
+// SCHEME_WITH_HOST (i.e., without ports) as valid with empty hosts, even though
+// most are not (e.g. chrome URLs).
+TEST_F(SchemeHostPortTest, EmptyHostGurlConversion) {
+  url::AddStandardScheme("chrome", url::SCHEME_WITH_HOST);
+
+  GURL chrome_url("chrome:");
+  EXPECT_FALSE(chrome_url.is_valid());
+
+  url::SchemeHostPort chrome_tuple("chrome", "", 0);
+  EXPECT_FALSE(chrome_tuple.GetURL().is_valid());
+  ExpectParsedUrlsEqual(GURL(chrome_tuple.Serialize()), chrome_tuple.GetURL());
+  ExpectParsedUrlsEqual(chrome_url, chrome_tuple.GetURL());
+}
+
+}  // namespace url
diff --git a/third_party/mozilla/LICENSE.txt b/third_party/mozilla/LICENSE.txt
new file mode 100644
index 00000000000..ac40837824a
--- /dev/null
+++ b/third_party/mozilla/LICENSE.txt
@@ -0,0 +1,65 @@
+Copyright 2007, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-------------------------------------------------------------------------------
+
+The file url_parse.cc is based on nsURLParsers.cc from Mozilla. This file is
+licensed separately as follows:
+
+The contents of this file are subject to the Mozilla Public License Version
+1.1 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+http://www.mozilla.org/MPL/
+
+Software distributed under the License is distributed on an "AS IS" basis,
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+for the specific language governing rights and limitations under the
+License.
+
+The Original Code is mozilla.org code.
+
+The Initial Developer of the Original Code is
+Netscape Communications Corporation.
+Portions created by the Initial Developer are Copyright (C) 1998
+the Initial Developer. All Rights Reserved.
+
+Contributor(s):
+  Darin Fisher (original author)
+
+Alternatively, the contents of this file may be used under the terms of
+either the GNU General Public License Version 2 or later (the "GPL"), or
+the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+in which case the provisions of the GPL or the LGPL are applicable instead
+of those above. If you wish to allow use of your version of this file only
+under the terms of either the GPL or the LGPL, and not to allow others to
+use your version of this file under the terms of the MPL, indicate your
+decision by deleting the provisions above and replace them with the notice
+and other provisions required by the GPL or the LGPL. If you do not delete
+the provisions above, a recipient may use your version of this file under
+the terms of any one of the MPL, the GPL or the LGPL.
diff --git a/third_party/mozilla/README.chromium b/third_party/mozilla/README.chromium
new file mode 100644
index 00000000000..ef396d3d1dd
--- /dev/null
+++ b/third_party/mozilla/README.chromium
@@ -0,0 +1,8 @@
+Name: url_parse
+URL: http://mxr.mozilla.org/comm-central/source/mozilla/netwerk/base/src/nsURLParsers.cpp
+License: BSD and MPL 1.1/GPL 2.0/LGPL 2.1
+License File: LICENSE.txt
+
+Description:
+
+The file url_parse.cc is based on nsURLParsers.cc from Mozilla.
diff --git a/third_party/mozilla/url_parse.cc b/third_party/mozilla/url_parse.cc
new file mode 100644
index 00000000000..61fb94e98ff
--- /dev/null
+++ b/third_party/mozilla/url_parse.cc
@@ -0,0 +1,963 @@
+/* Based on nsURLParsers.cc from Mozilla
+ * -------------------------------------
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is mozilla.org code.
+ *
+ * The Initial Developer of the Original Code is
+ * Netscape Communications Corporation.
+ * Portions created by the Initial Developer are Copyright (C) 1998
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Darin Fisher (original author)
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "url/third_party/mozilla/url_parse.h"
+
+#include <stdlib.h>
+
+#include <ostream>
+
+#include "base/check_op.h"
+#include "url/url_parse_internal.h"
+#include "url/url_util.h"
+#include "url/url_util_internal.h"
+
+namespace url {
+
+namespace {
+
+// Returns true if the given character is a valid digit to use in a port.
+inline bool IsPortDigit(char16_t ch) {
+  return ch >= '0' && ch <= '9';
+}
+
+// Returns the offset of the next authority terminator in the input starting
+// from start_offset. If no terminator is found, the return value will be equal
+// to spec_len.
+template <typename CHAR>
+int FindNextAuthorityTerminator(const CHAR* spec,
+                                int start_offset,
+                                int spec_len) {
+  for (int i = start_offset; i < spec_len; i++) {
+    if (IsAuthorityTerminator(spec[i]))
+      return i;
+  }
+  return spec_len;  // Not found.
+}
+
+template <typename CHAR>
+void ParseUserInfo(const CHAR* spec,
+                   const Component& user,
+                   Component* username,
+                   Component* password) {
+  // Find the first colon in the user section, which separates the username and
+  // password.
+  int colon_offset = 0;
+  while (colon_offset < user.len && spec[user.begin + colon_offset] != ':')
+    colon_offset++;
+
+  if (colon_offset < user.len) {
+    // Found separator: <username>:<password>
+    *username = Component(user.begin, colon_offset);
+    *password = MakeRange(user.begin + colon_offset + 1, user.begin + user.len);
+  } else {
+    // No separator, treat everything as the username
+    *username = user;
+    *password = Component();
+  }
+}
+
+template <typename CHAR>
+void ParseServerInfo(const CHAR* spec,
+                     const Component& serverinfo,
+                     Component* hostname,
+                     Component* port_num) {
+  if (serverinfo.len == 0) {
+    // No server info, host name is empty.
+    hostname->reset();
+    port_num->reset();
+    return;
+  }
+
+  // If the host starts with a left-bracket, assume the entire host is an
+  // IPv6 literal.  Otherwise, assume none of the host is an IPv6 literal.
+  // This assumption will be overridden if we find a right-bracket.
+  //
+  // Our IPv6 address canonicalization code requires both brackets to exist,
+  // but the ability to locate an incomplete address can still be useful.
+  int ipv6_terminator = spec[serverinfo.begin] == '[' ? serverinfo.end() : -1;
+  int colon = -1;
+
+  // Find the last right-bracket, and the last colon.
+  for (int i = serverinfo.begin; i < serverinfo.end(); i++) {
+    switch (spec[i]) {
+      case ']':
+        ipv6_terminator = i;
+        break;
+      case ':':
+        colon = i;
+        break;
+    }
+  }
+
+  if (colon > ipv6_terminator) {
+    // Found a port number: <hostname>:<port>
+    *hostname = MakeRange(serverinfo.begin, colon);
+    if (hostname->len == 0)
+      hostname->reset();
+    *port_num = MakeRange(colon + 1, serverinfo.end());
+  } else {
+    // No port: <hostname>
+    *hostname = serverinfo;
+    port_num->reset();
+  }
+}
+
+// Given an already-identified auth section, breaks it into its consituent
+// parts. The port number will be parsed and the resulting integer will be
+// filled into the given *port variable, or -1 if there is no port number or it
+// is invalid.
+template <typename CHAR>
+void DoParseAuthority(const CHAR* spec,
+                      const Component& auth,
+                      Component* username,
+                      Component* password,
+                      Component* hostname,
+                      Component* port_num) {
+  DCHECK(auth.is_valid()) << "We should always get an authority";
+  if (auth.len == 0) {
+    username->reset();
+    password->reset();
+    hostname->reset();
+    port_num->reset();
+    return;
+  }
+
+  // Search backwards for @, which is the separator between the user info and
+  // the server info.
+  int i = auth.begin + auth.len - 1;
+  while (i > auth.begin && spec[i] != '@')
+    i--;
+
+  if (spec[i] == '@') {
+    // Found user info: <user-info>@<server-info>
+    ParseUserInfo(spec, Component(auth.begin, i - auth.begin), username,
+                  password);
+    ParseServerInfo(spec, MakeRange(i + 1, auth.begin + auth.len), hostname,
+                    port_num);
+  } else {
+    // No user info, everything is server info.
+    username->reset();
+    password->reset();
+    ParseServerInfo(spec, auth, hostname, port_num);
+  }
+}
+
+template <typename CHAR>
+inline void FindQueryAndRefParts(const CHAR* spec,
+                                 const Component& path,
+                                 int* query_separator,
+                                 int* ref_separator) {
+  if constexpr (sizeof(*spec) == 1) {
+    // memchr is much faster than any scalar code we can write.
+    const CHAR* ptr = spec + path.begin;
+    const CHAR* first_hash =
+        reinterpret_cast<const CHAR*>(memchr(ptr, '#', path.len));
+    size_t len_before_fragment =
+        first_hash == nullptr ? path.len : first_hash - ptr;
+    const CHAR* first_question =
+        reinterpret_cast<const CHAR*>(memchr(ptr, '?', len_before_fragment));
+    if (first_hash != nullptr) {
+      *ref_separator = first_hash - spec;
+    }
+    if (first_question != nullptr) {
+      *query_separator = first_question - spec;
+    }
+  } else {
+    int path_end = path.begin + path.len;
+    for (int i = path.begin; i < path_end; i++) {
+      switch (spec[i]) {
+        case '?':
+          // Only match the query string if it precedes the reference fragment
+          // and when we haven't found one already.
+          if (*query_separator < 0)
+            *query_separator = i;
+          break;
+        case '#':
+          // Record the first # sign only.
+          if (*ref_separator < 0) {
+            *ref_separator = i;
+            return;
+          }
+          break;
+      }
+    }
+  }
+}
+
+template <typename CHAR>
+void ParsePath(const CHAR* spec,
+               const Component& path,
+               Component* filepath,
+               Component* query,
+               Component* ref) {
+  // path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<ref>
+
+  // Special case when there is no path.
+  if (path.len == -1) {
+    filepath->reset();
+    query->reset();
+    ref->reset();
+    return;
+  }
+  DCHECK(path.is_nonempty()) << "We should never have 0 length paths";
+
+  // Search for first occurrence of either ? or #.
+  int query_separator = -1;  // Index of the '?'
+  int ref_separator = -1;    // Index of the '#'
+  FindQueryAndRefParts(spec, path, &query_separator, &ref_separator);
+
+  // Markers pointing to the character after each of these corresponding
+  // components. The code below words from the end back to the beginning,
+  // and will update these indices as it finds components that exist.
+  int file_end, query_end;
+
+  // Ref fragment: from the # to the end of the path.
+  int path_end = path.begin + path.len;
+  if (ref_separator >= 0) {
+    file_end = query_end = ref_separator;
+    *ref = MakeRange(ref_separator + 1, path_end);
+  } else {
+    file_end = query_end = path_end;
+    ref->reset();
+  }
+
+  // Query fragment: everything from the ? to the next boundary (either the end
+  // of the path or the ref fragment).
+  if (query_separator >= 0) {
+    file_end = query_separator;
+    *query = MakeRange(query_separator + 1, query_end);
+  } else {
+    query->reset();
+  }
+
+  // File path: treat an empty file path as no file path.
+  if (file_end != path.begin)
+    *filepath = MakeRange(path.begin, file_end);
+  else
+    filepath->reset();
+}
+
+template <typename CHAR>
+bool DoExtractScheme(const CHAR* url, int url_len, Component* scheme) {
+  // Skip leading whitespace and control characters.
+  int begin = 0;
+  while (begin < url_len && ShouldTrimFromURL(url[begin]))
+    begin++;
+  if (begin == url_len)
+    return false;  // Input is empty or all whitespace.
+
+  // Find the first colon character.
+  for (int i = begin; i < url_len; i++) {
+    if (url[i] == ':') {
+      *scheme = MakeRange(begin, i);
+      return true;
+    }
+  }
+  return false;  // No colon found: no scheme
+}
+
+// Fills in all members of the Parsed structure except for the scheme.
+//
+// |spec| is the full spec being parsed, of length |spec_len|.
+// |after_scheme| is the character immediately following the scheme (after the
+//   colon) where we'll begin parsing.
+//
+// Compatability data points. I list "host", "path" extracted:
+// Input                IE6             Firefox                Us
+// -----                --------------  --------------         --------------
+// http://foo.com/      "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
+// http:foo.com/        "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
+// http:/foo.com/       fail(*)         "foo.com", "/"         "foo.com", "/"
+// http:\foo.com/       fail(*)         "\foo.com", "/"(fail)  "foo.com", "/"
+// http:////foo.com/    "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
+//
+// (*) Interestingly, although IE fails to load these URLs, its history
+// canonicalizer handles them, meaning if you've been to the corresponding
+// "http://foo.com/" link, it will be colored.
+template <typename CHAR>
+void DoParseAfterScheme(const CHAR* spec,
+                        int spec_len,
+                        int after_scheme,
+                        Parsed* parsed) {
+  int num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
+  int after_slashes = after_scheme + num_slashes;
+
+  // First split into two main parts, the authority (username, password, host,
+  // and port) and the full path (path, query, and reference).
+  Component authority;
+  Component full_path;
+
+  // Found "//<some data>", looks like an authority section. Treat everything
+  // from there to the next slash (or end of spec) to be the authority. Note
+  // that we ignore the number of slashes and treat it as the authority.
+  int end_auth = FindNextAuthorityTerminator(spec, after_slashes, spec_len);
+  authority = Component(after_slashes, end_auth - after_slashes);
+
+  if (end_auth == spec_len)  // No beginning of path found.
+    full_path = Component();
+  else  // Everything starting from the slash to the end is the path.
+    full_path = Component(end_auth, spec_len - end_auth);
+
+  // Now parse those two sub-parts.
+  DoParseAuthority(spec, authority, &parsed->username, &parsed->password,
+                   &parsed->host, &parsed->port);
+  ParsePath(spec, full_path, &parsed->path, &parsed->query, &parsed->ref);
+}
+
+// The main parsing function for standard URLs. Standard URLs have a scheme,
+// host, path, etc.
+template <typename CHAR>
+void DoParseStandardURL(const CHAR* spec, int spec_len, Parsed* parsed) {
+  DCHECK(spec_len >= 0);
+
+  // Strip leading & trailing spaces and control characters.
+  int begin = 0;
+  TrimURL(spec, &begin, &spec_len);
+
+  int after_scheme;
+  if (DoExtractScheme(spec, spec_len, &parsed->scheme)) {
+    after_scheme = parsed->scheme.end() + 1;  // Skip past the colon.
+  } else {
+    // Say there's no scheme when there is no colon. We could also say that
+    // everything is the scheme. Both would produce an invalid URL, but this way
+    // seems less wrong in more cases.
+    parsed->scheme.reset();
+    after_scheme = begin;
+  }
+  DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
+}
+
+template <typename CHAR>
+void DoParseFileSystemURL(const CHAR* spec, int spec_len, Parsed* parsed) {
+  DCHECK(spec_len >= 0);
+
+  // Get the unused parts of the URL out of the way.
+  parsed->username.reset();
+  parsed->password.reset();
+  parsed->host.reset();
+  parsed->port.reset();
+  parsed->path.reset();          // May use this; reset for convenience.
+  parsed->ref.reset();           // May use this; reset for convenience.
+  parsed->query.reset();         // May use this; reset for convenience.
+  parsed->clear_inner_parsed();  // May use this; reset for convenience.
+
+  // Strip leading & trailing spaces and control characters.
+  int begin = 0;
+  TrimURL(spec, &begin, &spec_len);
+
+  // Handle empty specs or ones that contain only whitespace or control chars.
+  if (begin == spec_len) {
+    parsed->scheme.reset();
+    return;
+  }
+
+  int inner_start = -1;
+
+  // Extract the scheme.  We also handle the case where there is no scheme.
+  if (DoExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
+    // Offset the results since we gave ExtractScheme a substring.
+    parsed->scheme.begin += begin;
+
+    if (parsed->scheme.end() == spec_len - 1)
+      return;
+
+    inner_start = parsed->scheme.end() + 1;
+  } else {
+    // No scheme found; that's not valid for filesystem URLs.
+    parsed->scheme.reset();
+    return;
+  }
+
+  Component inner_scheme;
+  const CHAR* inner_spec = &spec[inner_start];
+  int inner_spec_len = spec_len - inner_start;
+
+  if (DoExtractScheme(inner_spec, inner_spec_len, &inner_scheme)) {
+    // Offset the results since we gave ExtractScheme a substring.
+    inner_scheme.begin += inner_start;
+
+    if (inner_scheme.end() == spec_len - 1)
+      return;
+  } else {
+    // No scheme found; that's not valid for filesystem URLs.
+    // The best we can do is return "filesystem://".
+    return;
+  }
+
+  Parsed inner_parsed;
+
+  if (CompareSchemeComponent(spec, inner_scheme, kFileScheme)) {
+    // File URLs are special.
+    ParseFileURL(inner_spec, inner_spec_len, &inner_parsed);
+  } else if (CompareSchemeComponent(spec, inner_scheme, kFileSystemScheme)) {
+    // Filesystem URLs don't nest.
+    return;
+  } else if (IsStandard(spec, inner_scheme)) {
+    // All "normal" URLs.
+    DoParseStandardURL(inner_spec, inner_spec_len, &inner_parsed);
+  } else {
+    return;
+  }
+
+  // All members of inner_parsed need to be offset by inner_start.
+  // If we had any scheme that supported nesting more than one level deep,
+  // we'd have to recurse into the inner_parsed's inner_parsed when
+  // adjusting by inner_start.
+  inner_parsed.scheme.begin += inner_start;
+  inner_parsed.username.begin += inner_start;
+  inner_parsed.password.begin += inner_start;
+  inner_parsed.host.begin += inner_start;
+  inner_parsed.port.begin += inner_start;
+  inner_parsed.query.begin += inner_start;
+  inner_parsed.ref.begin += inner_start;
+  inner_parsed.path.begin += inner_start;
+
+  // Query and ref move from inner_parsed to parsed.
+  parsed->query = inner_parsed.query;
+  inner_parsed.query.reset();
+  parsed->ref = inner_parsed.ref;
+  inner_parsed.ref.reset();
+
+  parsed->set_inner_parsed(inner_parsed);
+  if (!inner_parsed.scheme.is_valid() || !inner_parsed.path.is_valid() ||
+      inner_parsed.inner_parsed()) {
+    return;
+  }
+
+  // The path in inner_parsed should start with a slash, then have a filesystem
+  // type followed by a slash.  From the first slash up to but excluding the
+  // second should be what it keeps; the rest goes to parsed.  If the path ends
+  // before the second slash, it's still pretty clear what the user meant, so
+  // we'll let that through.
+  if (!IsURLSlash(spec[inner_parsed.path.begin])) {
+    return;
+  }
+  int inner_path_end = inner_parsed.path.begin + 1;  // skip the leading slash
+  while (inner_path_end < spec_len && !IsURLSlash(spec[inner_path_end]))
+    ++inner_path_end;
+  parsed->path.begin = inner_path_end;
+  int new_inner_path_length = inner_path_end - inner_parsed.path.begin;
+  parsed->path.len = inner_parsed.path.len - new_inner_path_length;
+  parsed->inner_parsed()->path.len = new_inner_path_length;
+}
+
+// Initializes a path URL which is merely a scheme followed by a path. Examples
+// include "about:foo" and "javascript:alert('bar');"
+template <typename CHAR>
+void DoParsePathURL(const CHAR* spec,
+                    int spec_len,
+                    bool trim_path_end,
+                    Parsed* parsed) {
+  // Get the non-path and non-scheme parts of the URL out of the way, we never
+  // use them.
+  parsed->username.reset();
+  parsed->password.reset();
+  parsed->host.reset();
+  parsed->port.reset();
+  parsed->path.reset();
+  parsed->query.reset();
+  parsed->ref.reset();
+
+  // Strip leading & trailing spaces and control characters.
+  int scheme_begin = 0;
+  TrimURL(spec, &scheme_begin, &spec_len, trim_path_end);
+
+  // Handle empty specs or ones that contain only whitespace or control chars.
+  if (scheme_begin == spec_len) {
+    parsed->scheme.reset();
+    parsed->path.reset();
+    return;
+  }
+
+  int path_begin;
+  // Extract the scheme, with the path being everything following. We also
+  // handle the case where there is no scheme.
+  if (ExtractScheme(&spec[scheme_begin], spec_len - scheme_begin,
+                    &parsed->scheme)) {
+    // Offset the results since we gave ExtractScheme a substring.
+    parsed->scheme.begin += scheme_begin;
+    path_begin = parsed->scheme.end() + 1;
+  } else {
+    // No scheme case.
+    parsed->scheme.reset();
+    path_begin = scheme_begin;
+  }
+
+  if (path_begin == spec_len)
+    return;
+  DCHECK_LT(path_begin, spec_len);
+
+  ParsePath(spec, MakeRange(path_begin, spec_len), &parsed->path,
+            &parsed->query, &parsed->ref);
+}
+
+template <typename CHAR>
+void DoParseMailtoURL(const CHAR* spec, int spec_len, Parsed* parsed) {
+  DCHECK(spec_len >= 0);
+
+  // Get the non-path and non-scheme parts of the URL out of the way, we never
+  // use them.
+  parsed->username.reset();
+  parsed->password.reset();
+  parsed->host.reset();
+  parsed->port.reset();
+  parsed->ref.reset();
+  parsed->query.reset();  // May use this; reset for convenience.
+
+  // Strip leading & trailing spaces and control characters.
+  int begin = 0;
+  TrimURL(spec, &begin, &spec_len);
+
+  // Handle empty specs or ones that contain only whitespace or control chars.
+  if (begin == spec_len) {
+    parsed->scheme.reset();
+    parsed->path.reset();
+    return;
+  }
+
+  int path_begin = -1;
+  int path_end = -1;
+
+  // Extract the scheme, with the path being everything following. We also
+  // handle the case where there is no scheme.
+  if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
+    // Offset the results since we gave ExtractScheme a substring.
+    parsed->scheme.begin += begin;
+
+    if (parsed->scheme.end() != spec_len - 1) {
+      path_begin = parsed->scheme.end() + 1;
+      path_end = spec_len;
+    }
+  } else {
+    // No scheme found, just path.
+    parsed->scheme.reset();
+    path_begin = begin;
+    path_end = spec_len;
+  }
+
+  // Split [path_begin, path_end) into a path + query.
+  for (int i = path_begin; i < path_end; ++i) {
+    if (spec[i] == '?') {
+      parsed->query = MakeRange(i + 1, path_end);
+      path_end = i;
+      break;
+    }
+  }
+
+  // For compatability with the standard URL parser, treat no path as
+  // -1, rather than having a length of 0
+  if (path_begin == path_end) {
+    parsed->path.reset();
+  } else {
+    parsed->path = MakeRange(path_begin, path_end);
+  }
+}
+
+// Converts a port number in a string to an integer. We'd like to just call
+// sscanf but our input is not NULL-terminated, which sscanf requires. Instead,
+// we copy the digits to a small stack buffer (since we know the maximum number
+// of digits in a valid port number) that we can NULL terminate.
+template <typename CHAR>
+int DoParsePort(const CHAR* spec, const Component& component) {
+  // Easy success case when there is no port.
+  const int kMaxDigits = 5;
+  if (component.is_empty())
+    return PORT_UNSPECIFIED;
+
+  // Skip over any leading 0s.
+  Component digits_comp(component.end(), 0);
+  for (int i = 0; i < component.len; i++) {
+    if (spec[component.begin + i] != '0') {
+      digits_comp = MakeRange(component.begin + i, component.end());
+      break;
+    }
+  }
+  if (digits_comp.len == 0)
+    return 0;  // All digits were 0.
+
+  // Verify we don't have too many digits (we'll be copying to our buffer so
+  // we need to double-check).
+  if (digits_comp.len > kMaxDigits)
+    return PORT_INVALID;
+
+  // Copy valid digits to the buffer.
+  char digits[kMaxDigits + 1];  // +1 for null terminator
+  for (int i = 0; i < digits_comp.len; i++) {
+    CHAR ch = spec[digits_comp.begin + i];
+    if (!IsPortDigit(ch)) {
+      // Invalid port digit, fail.
+      return PORT_INVALID;
+    }
+    digits[i] = static_cast<char>(ch);
+  }
+
+  // Null-terminate the string and convert to integer. Since we guarantee
+  // only digits, atoi's lack of error handling is OK.
+  digits[digits_comp.len] = 0;
+  int port = atoi(digits);
+  if (port > 65535)
+    return PORT_INVALID;  // Out of range.
+  return port;
+}
+
+template <typename CHAR>
+void DoExtractFileName(const CHAR* spec,
+                       const Component& path,
+                       Component* file_name) {
+  // Handle empty paths: they have no file names.
+  if (path.is_empty()) {
+    file_name->reset();
+    return;
+  }
+
+  // Extract the filename range from the path which is between
+  // the last slash and the following semicolon.
+  int file_end = path.end();
+  for (int i = path.end() - 1; i >= path.begin; i--) {
+    if (spec[i] == ';') {
+      file_end = i;
+    } else if (IsURLSlash(spec[i])) {
+      // File name is everything following this character to the end
+      *file_name = MakeRange(i + 1, file_end);
+      return;
+    }
+  }
+
+  // No slash found, this means the input was degenerate (generally paths
+  // will start with a slash). Let's call everything the file name.
+  *file_name = MakeRange(path.begin, file_end);
+  return;
+}
+
+template <typename CHAR>
+bool DoExtractQueryKeyValue(const CHAR* spec,
+                            Component* query,
+                            Component* key,
+                            Component* value) {
+  if (!query->is_nonempty())
+    return false;
+
+  int start = query->begin;
+  int cur = start;
+  int end = query->end();
+
+  // We assume the beginning of the input is the beginning of the "key" and we
+  // skip to the end of it.
+  key->begin = cur;
+  while (cur < end && spec[cur] != '&' && spec[cur] != '=')
+    cur++;
+  key->len = cur - key->begin;
+
+  // Skip the separator after the key (if any).
+  if (cur < end && spec[cur] == '=')
+    cur++;
+
+  // Find the value part.
+  value->begin = cur;
+  while (cur < end && spec[cur] != '&')
+    cur++;
+  value->len = cur - value->begin;
+
+  // Finally skip the next separator if any
+  if (cur < end && spec[cur] == '&')
+    cur++;
+
+  // Save the new query
+  *query = MakeRange(cur, end);
+  return true;
+}
+
+}  // namespace
+
+COMPONENT_EXPORT(URL)
+std::ostream& operator<<(std::ostream& os, const Component& component) {
+  return os << '{' << component.begin << ", " << component.len << "}";
+}
+
+Parsed::Parsed() : potentially_dangling_markup(false), inner_parsed_(NULL) {}
+
+Parsed::Parsed(const Parsed& other)
+    : scheme(other.scheme),
+      username(other.username),
+      password(other.password),
+      host(other.host),
+      port(other.port),
+      path(other.path),
+      query(other.query),
+      ref(other.ref),
+      potentially_dangling_markup(other.potentially_dangling_markup),
+      inner_parsed_(NULL) {
+  if (other.inner_parsed_)
+    set_inner_parsed(*other.inner_parsed_);
+}
+
+Parsed& Parsed::operator=(const Parsed& other) {
+  if (this != &other) {
+    scheme = other.scheme;
+    username = other.username;
+    password = other.password;
+    host = other.host;
+    port = other.port;
+    path = other.path;
+    query = other.query;
+    ref = other.ref;
+    potentially_dangling_markup = other.potentially_dangling_markup;
+    if (other.inner_parsed_)
+      set_inner_parsed(*other.inner_parsed_);
+    else
+      clear_inner_parsed();
+  }
+  return *this;
+}
+
+Parsed::~Parsed() {
+  delete inner_parsed_;
+}
+
+int Parsed::Length() const {
+  if (ref.is_valid())
+    return ref.end();
+  return CountCharactersBefore(REF, false);
+}
+
+int Parsed::CountCharactersBefore(ComponentType type,
+                                  bool include_delimiter) const {
+  if (type == SCHEME)
+    return scheme.begin;
+
+  // There will be some characters after the scheme like "://" and we don't
+  // know how many. Search forwards for the next thing until we find one.
+  int cur = 0;
+  if (scheme.is_valid())
+    cur = scheme.end() + 1;  // Advance over the ':' at the end of the scheme.
+
+  if (username.is_valid()) {
+    if (type <= USERNAME)
+      return username.begin;
+    cur = username.end() + 1;  // Advance over the '@' or ':' at the end.
+  }
+
+  if (password.is_valid()) {
+    if (type <= PASSWORD)
+      return password.begin;
+    cur = password.end() + 1;  // Advance over the '@' at the end.
+  }
+
+  if (host.is_valid()) {
+    if (type <= HOST)
+      return host.begin;
+    cur = host.end();
+  }
+
+  if (port.is_valid()) {
+    if (type < PORT || (type == PORT && include_delimiter))
+      return port.begin - 1;  // Back over delimiter.
+    if (type == PORT)
+      return port.begin;  // Don't want delimiter counted.
+    cur = port.end();
+  }
+
+  if (path.is_valid()) {
+    if (type <= PATH)
+      return path.begin;
+    cur = path.end();
+  }
+
+  if (query.is_valid()) {
+    if (type < QUERY || (type == QUERY && include_delimiter))
+      return query.begin - 1;  // Back over delimiter.
+    if (type == QUERY)
+      return query.begin;  // Don't want delimiter counted.
+    cur = query.end();
+  }
+
+  if (ref.is_valid()) {
+    if (type == REF && !include_delimiter)
+      return ref.begin;  // Back over delimiter.
+
+    // When there is a ref and we get here, the component we wanted was before
+    // this and not found, so we always know the beginning of the ref is right.
+    return ref.begin - 1;  // Don't want delimiter counted.
+  }
+
+  return cur;
+}
+
+Component Parsed::GetContent() const {
+  const int begin = CountCharactersBefore(USERNAME, false);
+  const int len = Length() - begin;
+  // For compatability with the standard URL parser, we treat no content as
+  // -1, rather than having a length of 0 (we normally wouldn't care so
+  // much for these non-standard URLs).
+  return len ? Component(begin, len) : Component();
+}
+
+bool ExtractScheme(const char* url, int url_len, Component* scheme) {
+  return DoExtractScheme(url, url_len, scheme);
+}
+
+bool ExtractScheme(const char16_t* url, int url_len, Component* scheme) {
+  return DoExtractScheme(url, url_len, scheme);
+}
+
+// This handles everything that may be an authority terminator, including
+// backslash. For special backslash handling see DoParseAfterScheme.
+bool IsAuthorityTerminator(char16_t ch) {
+  return IsURLSlash(ch) || ch == '?' || ch == '#';
+}
+
+void ExtractFileName(const char* url,
+                     const Component& path,
+                     Component* file_name) {
+  DoExtractFileName(url, path, file_name);
+}
+
+void ExtractFileName(const char16_t* url,
+                     const Component& path,
+                     Component* file_name) {
+  DoExtractFileName(url, path, file_name);
+}
+
+bool ExtractQueryKeyValue(const char* url,
+                          Component* query,
+                          Component* key,
+                          Component* value) {
+  return DoExtractQueryKeyValue(url, query, key, value);
+}
+
+bool ExtractQueryKeyValue(const char16_t* url,
+                          Component* query,
+                          Component* key,
+                          Component* value) {
+  return DoExtractQueryKeyValue(url, query, key, value);
+}
+
+void ParseAuthority(const char* spec,
+                    const Component& auth,
+                    Component* username,
+                    Component* password,
+                    Component* hostname,
+                    Component* port_num) {
+  DoParseAuthority(spec, auth, username, password, hostname, port_num);
+}
+
+void ParseAuthority(const char16_t* spec,
+                    const Component& auth,
+                    Component* username,
+                    Component* password,
+                    Component* hostname,
+                    Component* port_num) {
+  DoParseAuthority(spec, auth, username, password, hostname, port_num);
+}
+
+int ParsePort(const char* url, const Component& port) {
+  return DoParsePort(url, port);
+}
+
+int ParsePort(const char16_t* url, const Component& port) {
+  return DoParsePort(url, port);
+}
+
+void ParseStandardURL(const char* url, int url_len, Parsed* parsed) {
+  DoParseStandardURL(url, url_len, parsed);
+}
+
+void ParseStandardURL(const char16_t* url, int url_len, Parsed* parsed) {
+  DoParseStandardURL(url, url_len, parsed);
+}
+
+void ParsePathURL(const char* url,
+                  int url_len,
+                  bool trim_path_end,
+                  Parsed* parsed) {
+  DoParsePathURL(url, url_len, trim_path_end, parsed);
+}
+
+void ParsePathURL(const char16_t* url,
+                  int url_len,
+                  bool trim_path_end,
+                  Parsed* parsed) {
+  DoParsePathURL(url, url_len, trim_path_end, parsed);
+}
+
+void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed) {
+  DoParseFileSystemURL(url, url_len, parsed);
+}
+
+void ParseFileSystemURL(const char16_t* url, int url_len, Parsed* parsed) {
+  DoParseFileSystemURL(url, url_len, parsed);
+}
+
+void ParseMailtoURL(const char* url, int url_len, Parsed* parsed) {
+  DoParseMailtoURL(url, url_len, parsed);
+}
+
+void ParseMailtoURL(const char16_t* url, int url_len, Parsed* parsed) {
+  DoParseMailtoURL(url, url_len, parsed);
+}
+
+void ParsePathInternal(const char* spec,
+                       const Component& path,
+                       Component* filepath,
+                       Component* query,
+                       Component* ref) {
+  ParsePath(spec, path, filepath, query, ref);
+}
+
+void ParsePathInternal(const char16_t* spec,
+                       const Component& path,
+                       Component* filepath,
+                       Component* query,
+                       Component* ref) {
+  ParsePath(spec, path, filepath, query, ref);
+}
+
+void ParseAfterScheme(const char* spec,
+                      int spec_len,
+                      int after_scheme,
+                      Parsed* parsed) {
+  DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
+}
+
+void ParseAfterScheme(const char16_t* spec,
+                      int spec_len,
+                      int after_scheme,
+                      Parsed* parsed) {
+  DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
+}
+
+}  // namespace url
diff --git a/third_party/mozilla/url_parse.h b/third_party/mozilla/url_parse.h
new file mode 100644
index 00000000000..9e824bae201
--- /dev/null
+++ b/third_party/mozilla/url_parse.h
@@ -0,0 +1,377 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_
+#define URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_
+
+#include <iosfwd>
+
+#include "base/component_export.h"
+
+namespace url {
+
+// Component ------------------------------------------------------------------
+
+// Represents a substring for URL parsing.
+struct Component {
+  Component() : begin(0), len(-1) {}
+
+  // Normal constructor: takes an offset and a length.
+  Component(int b, int l) : begin(b), len(l) {}
+
+  int end() const {
+    return begin + len;
+  }
+
+  // Returns true if this component is valid, meaning the length is given.
+  // Valid components may be empty to record the fact that they exist.
+  bool is_valid() const { return len >= 0; }
+
+  // Determine if the component is empty or not. Empty means the length is
+  // zero or the component is invalid.
+  bool is_empty() const { return len <= 0; }
+  bool is_nonempty() const { return len > 0; }
+
+  void reset() {
+    begin = 0;
+    len = -1;
+  }
+
+  bool operator==(const Component& other) const {
+    return begin == other.begin && len == other.len;
+  }
+
+  int begin;  // Byte offset in the string of this component.
+  int len;    // Will be -1 if the component is unspecified.
+};
+
+// Permit printing Components by CHECK macros.
+COMPONENT_EXPORT(URL)
+std::ostream& operator<<(std::ostream& os, const Component& component);
+
+// Helper that returns a component created with the given begin and ending
+// points. The ending point is non-inclusive.
+inline Component MakeRange(int begin, int end) {
+  return Component(begin, end - begin);
+}
+
+// Parsed ---------------------------------------------------------------------
+
+// A structure that holds the identified parts of an input URL. This structure
+// does NOT store the URL itself. The caller will have to store the URL text
+// and its corresponding Parsed structure separately.
+//
+// Typical usage would be:
+//
+//    Parsed parsed;
+//    Component scheme;
+//    if (!ExtractScheme(url, url_len, &scheme))
+//      return I_CAN_NOT_FIND_THE_SCHEME_DUDE;
+//
+//    if (IsStandardScheme(url, scheme))  // Not provided by this component
+//      ParseStandardURL(url, url_len, &parsed);
+//    else if (IsFileURL(url, scheme))    // Not provided by this component
+//      ParseFileURL(url, url_len, &parsed);
+//    else
+//      ParsePathURL(url, url_len, &parsed);
+//
+struct COMPONENT_EXPORT(URL) Parsed {
+  // Identifies different components.
+  enum ComponentType {
+    SCHEME,
+    USERNAME,
+    PASSWORD,
+    HOST,
+    PORT,
+    PATH,
+    QUERY,
+    REF,
+  };
+
+  // The default constructor is sufficient for the components, but inner_parsed_
+  // requires special handling.
+  Parsed();
+  Parsed(const Parsed&);
+  Parsed& operator=(const Parsed&);
+  ~Parsed();
+
+  // Returns the length of the URL (the end of the last component).
+  //
+  // Note that for some invalid, non-canonical URLs, this may not be the length
+  // of the string. For example "http://": the parsed structure will only
+  // contain an entry for the four-character scheme, and it doesn't know about
+  // the "://". For all other last-components, it will return the real length.
+  int Length() const;
+
+  // Returns the number of characters before the given component if it exists,
+  // or where the component would be if it did exist. This will return the
+  // string length if the component would be appended to the end.
+  //
+  // Note that this can get a little funny for the port, query, and ref
+  // components which have a delimiter that is not counted as part of the
+  // component. The |include_delimiter| flag controls if you want this counted
+  // as part of the component or not when the component exists.
+  //
+  // This example shows the difference between the two flags for two of these
+  // delimited components that is present (the port and query) and one that
+  // isn't (the reference). The components that this flag affects are marked
+  // with a *.
+  //                 0         1         2
+  //                 012345678901234567890
+  // Example input:  http://foo:80/?query
+  //              include_delim=true,  ...=false  ("<-" indicates different)
+  //      SCHEME: 0                    0
+  //    USERNAME: 5                    5
+  //    PASSWORD: 5                    5
+  //        HOST: 7                    7
+  //       *PORT: 10                   11 <-
+  //        PATH: 13                   13
+  //      *QUERY: 14                   15 <-
+  //        *REF: 20                   20
+  //
+  int CountCharactersBefore(ComponentType type, bool include_delimiter) const;
+
+  // Scheme without the colon: "http://foo"/ would have a scheme of "http".
+  // The length will be -1 if no scheme is specified ("foo.com"), or 0 if there
+  // is a colon but no scheme (":foo"). Note that the scheme is not guaranteed
+  // to start at the beginning of the string if there are preceeding whitespace
+  // or control characters.
+  Component scheme;
+
+  // Username. Specified in URLs with an @ sign before the host. See |password|
+  Component username;
+
+  // Password. The length will be -1 if unspecified, 0 if specified but empty.
+  // Not all URLs with a username have a password, as in "http://me@host/".
+  // The password is separated form the username with a colon, as in
+  // "http://me:secret@host/"
+  Component password;
+
+  // Host name.
+  Component host;
+
+  // Port number.
+  Component port;
+
+  // Path, this is everything following the host name, stopping at the query of
+  // ref delimiter (if any). Length will be -1 if unspecified. This includes
+  // the preceeding slash, so the path on http://www.google.com/asdf" is
+  // "/asdf". As a result, it is impossible to have a 0 length path, it will
+  // be -1 in cases like "http://host?foo".
+  // Note that we treat backslashes the same as slashes.
+  Component path;
+
+  // Stuff between the ? and the # after the path. This does not include the
+  // preceeding ? character. Length will be -1 if unspecified, 0 if there is
+  // a question mark but no query string.
+  Component query;
+
+  // Indicated by a #, this is everything following the hash sign (not
+  // including it). If there are multiple hash signs, we'll use the last one.
+  // Length will be -1 if there is no hash sign, or 0 if there is one but
+  // nothing follows it.
+  Component ref;
+
+  // The URL spec from the character after the scheme: until the end of the
+  // URL, regardless of the scheme. This is mostly useful for 'opaque' non-
+  // hierarchical schemes like data: and javascript: as a convient way to get
+  // the string with the scheme stripped off.
+  Component GetContent() const;
+
+  // True if the URL's source contained a raw `<` character, and whitespace was
+  // removed from the URL during parsing
+  //
+  // TODO(mkwst): Link this to something in a spec if
+  // https://github.com/whatwg/url/pull/284 lands.
+  bool potentially_dangling_markup;
+
+  // This is used for nested URL types, currently only filesystem.  If you
+  // parse a filesystem URL, the resulting Parsed will have a nested
+  // inner_parsed_ to hold the parsed inner URL's component information.
+  // For all other url types [including the inner URL], it will be NULL.
+  Parsed* inner_parsed() const {
+    return inner_parsed_;
+  }
+
+  void set_inner_parsed(const Parsed& inner_parsed) {
+    if (!inner_parsed_)
+      inner_parsed_ = new Parsed(inner_parsed);
+    else
+      *inner_parsed_ = inner_parsed;
+  }
+
+  void clear_inner_parsed() {
+    if (inner_parsed_) {
+      delete inner_parsed_;
+      inner_parsed_ = nullptr;
+    }
+  }
+
+ private:
+  Parsed* inner_parsed_;  // This object is owned and managed by this struct.
+};
+
+// Initialization functions ---------------------------------------------------
+//
+// These functions parse the given URL, filling in all of the structure's
+// components. These functions can not fail, they will always do their best
+// at interpreting the input given.
+//
+// The string length of the URL MUST be specified, we do not check for NULLs
+// at any point in the process, and will actually handle embedded NULLs.
+//
+// IMPORTANT: These functions do NOT hang on to the given pointer or copy it
+// in any way. See the comment above the struct.
+//
+// The 8-bit versions require UTF-8 encoding.
+
+// StandardURL is for when the scheme is known to be one that has an
+// authority (host) like "http". This function will not handle weird ones
+// like "about:" and "javascript:", or do the right thing for "file:" URLs.
+COMPONENT_EXPORT(URL)
+void ParseStandardURL(const char* url, int url_len, Parsed* parsed);
+COMPONENT_EXPORT(URL)
+void ParseStandardURL(const char16_t* url, int url_len, Parsed* parsed);
+
+// PathURL is for when the scheme is known not to have an authority (host)
+// section but that aren't file URLs either. The scheme is parsed, and
+// everything after the scheme is considered as the path. This is used for
+// things like "about:" and "javascript:"
+COMPONENT_EXPORT(URL)
+void ParsePathURL(const char* url,
+                  int url_len,
+                  bool trim_path_end,
+                  Parsed* parsed);
+COMPONENT_EXPORT(URL)
+void ParsePathURL(const char16_t* url,
+                  int url_len,
+                  bool trim_path_end,
+                  Parsed* parsed);
+
+// FileURL is for file URLs. There are some special rules for interpreting
+// these.
+COMPONENT_EXPORT(URL)
+void ParseFileURL(const char* url, int url_len, Parsed* parsed);
+COMPONENT_EXPORT(URL)
+void ParseFileURL(const char16_t* url, int url_len, Parsed* parsed);
+
+// Filesystem URLs are structured differently than other URLs.
+COMPONENT_EXPORT(URL)
+void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed);
+COMPONENT_EXPORT(URL)
+void ParseFileSystemURL(const char16_t* url, int url_len, Parsed* parsed);
+
+// MailtoURL is for mailto: urls. They are made up scheme,path,query
+COMPONENT_EXPORT(URL)
+void ParseMailtoURL(const char* url, int url_len, Parsed* parsed);
+COMPONENT_EXPORT(URL)
+void ParseMailtoURL(const char16_t* url, int url_len, Parsed* parsed);
+
+// Helper functions -----------------------------------------------------------
+
+// Locates the scheme according to the URL  parser's rules. This function is
+// designed so the caller can find the scheme and call the correct Init*
+// function according to their known scheme types.
+//
+// It also does not perform any validation on the scheme.
+//
+// This function will return true if the scheme is found and will put the
+// scheme's range into *scheme. False means no scheme could be found. Note
+// that a URL beginning with a colon has a scheme, but it is empty, so this
+// function will return true but *scheme will = (0,0).
+//
+// The scheme is found by skipping spaces and control characters at the
+// beginning, and taking everything from there to the first colon to be the
+// scheme. The character at scheme.end() will be the colon (we may enhance
+// this to handle full width colons or something, so don't count on the
+// actual character value). The character at scheme.end()+1 will be the
+// beginning of the rest of the URL, be it the authority or the path (or the
+// end of the string).
+//
+// The 8-bit version requires UTF-8 encoding.
+COMPONENT_EXPORT(URL)
+bool ExtractScheme(const char* url, int url_len, Component* scheme);
+COMPONENT_EXPORT(URL)
+bool ExtractScheme(const char16_t* url, int url_len, Component* scheme);
+
+// Returns true if ch is a character that terminates the authority segment
+// of a URL.
+COMPONENT_EXPORT(URL) bool IsAuthorityTerminator(char16_t ch);
+
+// Does a best effort parse of input |spec|, in range |auth|. If a particular
+// component is not found, it will be set to invalid.
+COMPONENT_EXPORT(URL)
+void ParseAuthority(const char* spec,
+                    const Component& auth,
+                    Component* username,
+                    Component* password,
+                    Component* hostname,
+                    Component* port_num);
+COMPONENT_EXPORT(URL)
+void ParseAuthority(const char16_t* spec,
+                    const Component& auth,
+                    Component* username,
+                    Component* password,
+                    Component* hostname,
+                    Component* port_num);
+
+// Computes the integer port value from the given port component. The port
+// component should have been identified by one of the init functions on
+// |Parsed| for the given input url.
+//
+// The return value will be a positive integer between 0 and 64K, or one of
+// the two special values below.
+enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 };
+COMPONENT_EXPORT(URL) int ParsePort(const char* url, const Component& port);
+COMPONENT_EXPORT(URL)
+int ParsePort(const char16_t* url, const Component& port);
+
+// Extracts the range of the file name in the given url. The path must
+// already have been computed by the parse function, and the matching URL
+// and extracted path are provided to this function. The filename is
+// defined as being everything from the last slash/backslash of the path
+// to the end of the path.
+//
+// The file name will be empty if the path is empty or there is nothing
+// following the last slash.
+//
+// The 8-bit version requires UTF-8 encoding.
+COMPONENT_EXPORT(URL)
+void ExtractFileName(const char* url,
+                     const Component& path,
+                     Component* file_name);
+COMPONENT_EXPORT(URL)
+void ExtractFileName(const char16_t* url,
+                     const Component& path,
+                     Component* file_name);
+
+// Extract the first key/value from the range defined by |*query|. Updates
+// |*query| to start at the end of the extracted key/value pair. This is
+// designed for use in a loop: you can keep calling it with the same query
+// object and it will iterate over all items in the query.
+//
+// Some key/value pairs may have the key, the value, or both be empty (for
+// example, the query string "?&"). These will be returned. Note that an empty
+// last parameter "foo.com?" or foo.com?a&" will not be returned, this case
+// is the same as "done."
+//
+// The initial query component should not include the '?' (this is the default
+// for parsed URLs).
+//
+// If no key/value are found |*key| and |*value| will be unchanged and it will
+// return false.
+COMPONENT_EXPORT(URL)
+bool ExtractQueryKeyValue(const char* url,
+                          Component* query,
+                          Component* key,
+                          Component* value);
+COMPONENT_EXPORT(URL)
+bool ExtractQueryKeyValue(const char16_t* url,
+                          Component* query,
+                          Component* key,
+                          Component* value);
+
+}  // namespace url
+
+#endif  // URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_
diff --git a/url_canon.cc b/url_canon.cc
new file mode 100644
index 00000000000..bbacaa7cdc6
--- /dev/null
+++ b/url_canon.cc
@@ -0,0 +1,15 @@
+// Copyright 2017 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/url_canon.h"
+
+#include "base/component_export.h"
+
+namespace url {
+
+template class EXPORT_TEMPLATE_DEFINE(COMPONENT_EXPORT(URL)) CanonOutputT<char>;
+template class EXPORT_TEMPLATE_DEFINE(COMPONENT_EXPORT(URL))
+    CanonOutputT<char16_t>;
+
+}  // namespace url
diff --git a/url_canon.h b/url_canon.h
new file mode 100644
index 00000000000..94b44426fa3
--- /dev/null
+++ b/url_canon.h
@@ -0,0 +1,1037 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_CANON_H_
+#define URL_URL_CANON_H_
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "base/component_export.h"
+#include "base/export_template.h"
+#include "base/memory/raw_ptr_exclusion.h"
+#include "base/numerics/clamped_math.h"
+#include "url/third_party/mozilla/url_parse.h"
+
+namespace url {
+
+// Canonicalizer output -------------------------------------------------------
+
+// Base class for the canonicalizer output, this maintains a buffer and
+// supports simple resizing and append operations on it.
+//
+// It is VERY IMPORTANT that no virtual function calls be made on the common
+// code path. We only have two virtual function calls, the destructor and a
+// resize function that is called when the existing buffer is not big enough.
+// The derived class is then in charge of setting up our buffer which we will
+// manage.
+template <typename T>
+class CanonOutputT {
+ public:
+  CanonOutputT() = default;
+  virtual ~CanonOutputT() = default;
+
+  // Implemented to resize the buffer. This function should update the buffer
+  // pointer to point to the new buffer, and any old data up to |cur_len_| in
+  // the buffer must be copied over.
+  //
+  // The new size |sz| must be larger than buffer_len_.
+  virtual void Resize(size_t sz) = 0;
+
+  // Accessor for returning a character at a given position. The input offset
+  // must be in the valid range.
+  inline T at(size_t offset) const { return buffer_[offset]; }
+
+  // Sets the character at the given position. The given position MUST be less
+  // than the length().
+  inline void set(size_t offset, T ch) { buffer_[offset] = ch; }
+
+  // Returns the number of characters currently in the buffer.
+  inline size_t length() const { return cur_len_; }
+
+  // Returns the current capacity of the buffer. The length() is the number of
+  // characters that have been declared to be written, but the capacity() is
+  // the number that can be written without reallocation. If the caller must
+  // write many characters at once, it can make sure there is enough capacity,
+  // write the data, then use set_size() to declare the new length().
+  size_t capacity() const { return buffer_len_; }
+
+  // Called by the user of this class to get the output. The output will NOT
+  // be NULL-terminated. Call length() to get the
+  // length.
+  const T* data() const { return buffer_; }
+  T* data() { return buffer_; }
+
+  // Shortens the URL to the new length. Used for "backing up" when processing
+  // relative paths. This can also be used if an external function writes a lot
+  // of data to the buffer (when using the "Raw" version below) beyond the end,
+  // to declare the new length.
+  //
+  // This MUST NOT be used to expand the size of the buffer beyond capacity().
+  void set_length(size_t new_len) { cur_len_ = new_len; }
+
+  // This is the most performance critical function, since it is called for
+  // every character.
+  void push_back(T ch) {
+    // In VC2005, putting this common case first speeds up execution
+    // dramatically because this branch is predicted as taken.
+    if (cur_len_ < buffer_len_) {
+      buffer_[cur_len_] = ch;
+      cur_len_++;
+      return;
+    }
+
+    // Grow the buffer to hold at least one more item. Hopefully we won't have
+    // to do this very often.
+    if (!Grow(1))
+      return;
+
+    // Actually do the insertion.
+    buffer_[cur_len_] = ch;
+    cur_len_++;
+  }
+
+  // Appends the given string to the output.
+  void Append(const T* str, size_t str_len) {
+    if (str_len > buffer_len_ - cur_len_) {
+      if (!Grow(str_len - (buffer_len_ - cur_len_)))
+        return;
+    }
+    memcpy(buffer_ + cur_len_, str, str_len * sizeof(T));
+    cur_len_ += str_len;
+  }
+
+  void ReserveSizeIfNeeded(size_t estimated_size) {
+    // Reserve a bit extra to account for escaped chars.
+    if (estimated_size > buffer_len_)
+      Resize((base::ClampedNumeric<size_t>(estimated_size) + 8).RawValue());
+  }
+
+ protected:
+  // Grows the given buffer so that it can fit at least |min_additional|
+  // characters. Returns true if the buffer could be resized, false on OOM.
+  bool Grow(size_t min_additional) {
+    static const size_t kMinBufferLen = 16;
+    size_t new_len = (buffer_len_ == 0) ? kMinBufferLen : buffer_len_;
+    do {
+      if (new_len >= (1 << 30))  // Prevent overflow below.
+        return false;
+      new_len *= 2;
+    } while (new_len < buffer_len_ + min_additional);
+    Resize(new_len);
+    return true;
+  }
+
+  // `buffer_` is not a raw_ptr<...> for performance reasons (based on analysis
+  // of sampling profiler data).
+  RAW_PTR_EXCLUSION T* buffer_ = nullptr;
+  size_t buffer_len_ = 0;
+
+  // Used characters in the buffer.
+  size_t cur_len_ = 0;
+};
+
+// Simple implementation of the CanonOutput using new[]. This class
+// also supports a static buffer so if it is allocated on the stack, most
+// URLs can be canonicalized with no heap allocations.
+template <typename T, int fixed_capacity = 1024>
+class RawCanonOutputT : public CanonOutputT<T> {
+ public:
+  RawCanonOutputT() : CanonOutputT<T>() {
+    this->buffer_ = fixed_buffer_;
+    this->buffer_len_ = fixed_capacity;
+  }
+  ~RawCanonOutputT() override {
+    if (this->buffer_ != fixed_buffer_)
+      delete[] this->buffer_;
+  }
+
+  void Resize(size_t sz) override {
+    T* new_buf = new T[sz];
+    memcpy(new_buf, this->buffer_,
+           sizeof(T) * (this->cur_len_ < sz ? this->cur_len_ : sz));
+    if (this->buffer_ != fixed_buffer_)
+      delete[] this->buffer_;
+    this->buffer_ = new_buf;
+    this->buffer_len_ = sz;
+  }
+
+ protected:
+  T fixed_buffer_[fixed_capacity];
+};
+
+// Explicitely instantiate commonly used instatiations.
+extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL))
+    CanonOutputT<char>;
+extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL))
+    CanonOutputT<char16_t>;
+
+// Normally, all canonicalization output is in narrow characters. We support
+// the templates so it can also be used internally if a wide buffer is
+// required.
+typedef CanonOutputT<char> CanonOutput;
+typedef CanonOutputT<char16_t> CanonOutputW;
+
+template <int fixed_capacity>
+class RawCanonOutput : public RawCanonOutputT<char, fixed_capacity> {};
+template <int fixed_capacity>
+class RawCanonOutputW : public RawCanonOutputT<char16_t, fixed_capacity> {};
+
+// Character set converter ----------------------------------------------------
+//
+// Converts query strings into a custom encoding. The embedder can supply an
+// implementation of this class to interface with their own character set
+// conversion libraries.
+//
+// Embedders will want to see the unit test for the ICU version.
+
+class COMPONENT_EXPORT(URL) CharsetConverter {
+ public:
+  CharsetConverter() {}
+  virtual ~CharsetConverter() {}
+
+  // Converts the given input string from UTF-16 to whatever output format the
+  // converter supports. This is used only for the query encoding conversion,
+  // which does not fail. Instead, the converter should insert "invalid
+  // character" characters in the output for invalid sequences, and do the
+  // best it can.
+  //
+  // If the input contains a character not representable in the output
+  // character set, the converter should append the HTML entity sequence in
+  // decimal, (such as "&#20320;") with escaping of the ampersand, number
+  // sign, and semicolon (in the previous example it would be
+  // "%26%2320320%3B"). This rule is based on what IE does in this situation.
+  virtual void ConvertFromUTF16(const char16_t* input,
+                                int input_len,
+                                CanonOutput* output) = 0;
+};
+
+// Schemes --------------------------------------------------------------------
+
+// Types of a scheme representing the requirements on the data represented by
+// the authority component of a URL with the scheme.
+enum SchemeType {
+  // The authority component of a URL with the scheme has the form
+  // "username:password@host:port". The username and password entries are
+  // optional; the host may not be empty. The default value of the port can be
+  // omitted in serialization. This type occurs with network schemes like http,
+  // https, and ftp.
+  SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION,
+  // The authority component of a URL with the scheme has the form "host:port",
+  // and does not include username or password. The default value of the port
+  // can be omitted in serialization. Used by inner URLs of filesystem URLs of
+  // origins with network hosts, from which the username and password are
+  // stripped.
+  SCHEME_WITH_HOST_AND_PORT,
+  // The authority component of an URL with the scheme has the form "host", and
+  // does not include port, username, or password. Used when the hosts are not
+  // network addresses; for example, schemes used internally by the browser.
+  SCHEME_WITH_HOST,
+  // A URL with the scheme doesn't have the authority component.
+  SCHEME_WITHOUT_AUTHORITY,
+};
+
+// Whitespace -----------------------------------------------------------------
+
+// Searches for whitespace that should be removed from the middle of URLs, and
+// removes it. Removed whitespace are tabs and newlines, but NOT spaces. Spaces
+// are preserved, which is what most browsers do. A pointer to the output will
+// be returned, and the length of that output will be in |output_len|.
+//
+// This should be called before parsing if whitespace removal is desired (which
+// it normally is when you are canonicalizing).
+//
+// If no whitespace is removed, this function will not use the buffer and will
+// return a pointer to the input, to avoid the extra copy. If modification is
+// required, the given |buffer| will be used and the returned pointer will
+// point to the beginning of the buffer.
+//
+// Therefore, callers should not use the buffer, since it may actually be empty,
+// use the computed pointer and |*output_len| instead.
+//
+// If |input| contained both removable whitespace and a raw `<` character,
+// |potentially_dangling_markup| will be set to `true`. Otherwise, it will be
+// left untouched.
+COMPONENT_EXPORT(URL)
+const char* RemoveURLWhitespace(const char* input,
+                                int input_len,
+                                CanonOutputT<char>* buffer,
+                                int* output_len,
+                                bool* potentially_dangling_markup);
+COMPONENT_EXPORT(URL)
+const char16_t* RemoveURLWhitespace(const char16_t* input,
+                                    int input_len,
+                                    CanonOutputT<char16_t>* buffer,
+                                    int* output_len,
+                                    bool* potentially_dangling_markup);
+
+// IDN ------------------------------------------------------------------------
+
+// Converts the Unicode input representing a hostname to ASCII using IDN rules.
+// The output must fall in the ASCII range, but will be encoded in UTF-16.
+//
+// On success, the output will be filled with the ASCII host name and it will
+// return true. Unlike most other canonicalization functions, this assumes that
+// the output is empty. The beginning of the host will be at offset 0, and
+// the length of the output will be set to the length of the new host name.
+//
+// On error, returns false. The output in this case is undefined.
+COMPONENT_EXPORT(URL)
+bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output);
+
+// Piece-by-piece canonicalizers ----------------------------------------------
+//
+// These individual canonicalizers append the canonicalized versions of the
+// corresponding URL component to the given CanonOutput. The spec and the
+// previously-identified range of that component are the input. The range of
+// the canonicalized component will be written to the output component.
+//
+// These functions all append to the output so they can be chained. Make sure
+// the output is empty when you start.
+//
+// These functions returns boolean values indicating success. On failure, they
+// will attempt to write something reasonable to the output so that, if
+// displayed to the user, they will recognise it as something that's messed up.
+// Nothing more should ever be done with these invalid URLs, however.
+
+// Scheme: Appends the scheme and colon to the URL. The output component will
+// indicate the range of characters up to but not including the colon.
+//
+// Canonical URLs always have a scheme. If the scheme is not present in the
+// input, this will just write the colon to indicate an empty scheme. Does not
+// append slashes which will be needed before any authority components for most
+// URLs.
+//
+// The 8-bit version requires UTF-8 encoding.
+COMPONENT_EXPORT(URL)
+bool CanonicalizeScheme(const char* spec,
+                        const Component& scheme,
+                        CanonOutput* output,
+                        Component* out_scheme);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeScheme(const char16_t* spec,
+                        const Component& scheme,
+                        CanonOutput* output,
+                        Component* out_scheme);
+
+// User info: username/password. If present, this will add the delimiters so
+// the output will be "<username>:<password>@" or "<username>@". Empty
+// username/password pairs, or empty passwords, will get converted to
+// nonexistent in the canonical version.
+//
+// The components for the username and password refer to ranges in the
+// respective source strings. Usually, these will be the same string, which
+// is legal as long as the two components don't overlap.
+//
+// The 8-bit version requires UTF-8 encoding.
+COMPONENT_EXPORT(URL)
+bool CanonicalizeUserInfo(const char* username_source,
+                          const Component& username,
+                          const char* password_source,
+                          const Component& password,
+                          CanonOutput* output,
+                          Component* out_username,
+                          Component* out_password);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeUserInfo(const char16_t* username_source,
+                          const Component& username,
+                          const char16_t* password_source,
+                          const Component& password,
+                          CanonOutput* output,
+                          Component* out_username,
+                          Component* out_password);
+
+// This structure holds detailed state exported from the IP/Host canonicalizers.
+// Additional fields may be added as callers require them.
+struct CanonHostInfo {
+  CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {}
+
+  // Convenience function to test if family is an IP address.
+  bool IsIPAddress() const { return family == IPV4 || family == IPV6; }
+
+  // This field summarizes how the input was classified by the canonicalizer.
+  enum Family {
+    NEUTRAL,  // - Doesn't resemble an IP address. As far as the IP
+              //   canonicalizer is concerned, it should be treated as a
+              //   hostname.
+    BROKEN,   // - Almost an IP, but was not canonicalized. This could be an
+              //   IPv4 address where truncation occurred, or something
+              //   containing the special characters :[] which did not parse
+              //   as an IPv6 address. Never attempt to connect to this
+              //   address, because it might actually succeed!
+    IPV4,     // - Successfully canonicalized as an IPv4 address.
+    IPV6,     // - Successfully canonicalized as an IPv6 address.
+  };
+  Family family;
+
+  // If |family| is IPV4, then this is the number of nonempty dot-separated
+  // components in the input text, from 1 to 4. If |family| is not IPV4,
+  // this value is undefined.
+  int num_ipv4_components;
+
+  // Location of host within the canonicalized output.
+  // CanonicalizeIPAddress() only sets this field if |family| is IPV4 or IPV6.
+  // CanonicalizeHostVerbose() always sets it.
+  Component out_host;
+
+  // |address| contains the parsed IP Address (if any) in its first
+  // AddressLength() bytes, in network order. If IsIPAddress() is false
+  // AddressLength() will return zero and the content of |address| is undefined.
+  unsigned char address[16];
+
+  // Convenience function to calculate the length of an IP address corresponding
+  // to the current IP version in |family|, if any. For use with |address|.
+  int AddressLength() const {
+    return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0);
+  }
+};
+
+// Host.
+//
+// The 8-bit version requires UTF-8 encoding. Use this version when you only
+// need to know whether canonicalization succeeded.
+COMPONENT_EXPORT(URL)
+bool CanonicalizeHost(const char* spec,
+                      const Component& host,
+                      CanonOutput* output,
+                      Component* out_host);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeHost(const char16_t* spec,
+                      const Component& host,
+                      CanonOutput* output,
+                      Component* out_host);
+
+// Extended version of CanonicalizeHost, which returns additional information.
+// Use this when you need to know whether the hostname was an IP address.
+// A successful return is indicated by host_info->family != BROKEN. See the
+// definition of CanonHostInfo above for details.
+COMPONENT_EXPORT(URL)
+void CanonicalizeHostVerbose(const char* spec,
+                             const Component& host,
+                             CanonOutput* output,
+                             CanonHostInfo* host_info);
+COMPONENT_EXPORT(URL)
+void CanonicalizeHostVerbose(const char16_t* spec,
+                             const Component& host,
+                             CanonOutput* output,
+                             CanonHostInfo* host_info);
+
+// Canonicalizes a string according to the host canonicalization rules. Unlike
+// CanonicalizeHost, this will not check for IP addresses which can change the
+// meaning (and canonicalization) of the components. This means it is possible
+// to call this for sub-components of a host name without corruption.
+//
+// As an example, "01.02.03.04.com" is a canonical hostname. If you called
+// CanonicalizeHost on the substring "01.02.03.04" it will get "fixed" to
+// "1.2.3.4" which will produce an invalid host name when reassembled. This
+// can happen more than one might think because all numbers by themselves are
+// considered IP addresses; so "5" canonicalizes to "0.0.0.5".
+//
+// Be careful: Because Punycode works on each dot-separated substring as a
+// unit, you should only pass this function substrings that represent complete
+// dot-separated subcomponents of the original host. Even if you have ASCII
+// input, percent-escaped characters will have different meanings if split in
+// the middle.
+//
+// Returns true if the host was valid. This function will treat a 0-length
+// host as valid (because it's designed to be used for substrings) while the
+// full version above will mark empty hosts as broken.
+COMPONENT_EXPORT(URL)
+bool CanonicalizeHostSubstring(const char* spec,
+                               const Component& host,
+                               CanonOutput* output);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeHostSubstring(const char16_t* spec,
+                               const Component& host,
+                               CanonOutput* output);
+
+// IP addresses.
+//
+// Tries to interpret the given host name as an IPv4 or IPv6 address. If it is
+// an IP address, it will canonicalize it as such, appending it to |output|.
+// Additional status information is returned via the |*host_info| parameter.
+// See the definition of CanonHostInfo above for details.
+//
+// This is called AUTOMATICALLY from the host canonicalizer, which ensures that
+// the input is unescaped and name-prepped, etc. It should not normally be
+// necessary or wise to call this directly.
+COMPONENT_EXPORT(URL)
+void CanonicalizeIPAddress(const char* spec,
+                           const Component& host,
+                           CanonOutput* output,
+                           CanonHostInfo* host_info);
+COMPONENT_EXPORT(URL)
+void CanonicalizeIPAddress(const char16_t* spec,
+                           const Component& host,
+                           CanonOutput* output,
+                           CanonHostInfo* host_info);
+
+// Port: this function will add the colon for the port if a port is present.
+// The caller can pass PORT_UNSPECIFIED as the
+// default_port_for_scheme argument if there is no default port.
+//
+// The 8-bit version requires UTF-8 encoding.
+COMPONENT_EXPORT(URL)
+bool CanonicalizePort(const char* spec,
+                      const Component& port,
+                      int default_port_for_scheme,
+                      CanonOutput* output,
+                      Component* out_port);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePort(const char16_t* spec,
+                      const Component& port,
+                      int default_port_for_scheme,
+                      CanonOutput* output,
+                      Component* out_port);
+
+// Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED
+// if the scheme is unknown. Based on https://url.spec.whatwg.org/#default-port
+COMPONENT_EXPORT(URL)
+int DefaultPortForScheme(const char* scheme, int scheme_len);
+
+// Path. If the input does not begin in a slash (including if the input is
+// empty), we'll prepend a slash to the path to make it canonical.
+//
+// The 8-bit version assumes UTF-8 encoding, but does not verify the validity
+// of the UTF-8 (i.e., you can have invalid UTF-8 sequences, invalid
+// characters, etc.). Normally, URLs will come in as UTF-16, so this isn't
+// an issue. Somebody giving us an 8-bit path is responsible for generating
+// the path that the server expects (we'll escape high-bit characters), so
+// if something is invalid, it's their problem.
+COMPONENT_EXPORT(URL)
+bool CanonicalizePath(const char* spec,
+                      const Component& path,
+                      CanonOutput* output,
+                      Component* out_path);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePath(const char16_t* spec,
+                      const Component& path,
+                      CanonOutput* output,
+                      Component* out_path);
+
+// Like CanonicalizePath(), but does not assume that its operating on the
+// entire path.  It therefore does not prepend a slash, etc.
+COMPONENT_EXPORT(URL)
+bool CanonicalizePartialPath(const char* spec,
+                             const Component& path,
+                             CanonOutput* output,
+                             Component* out_path);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePartialPath(const char16_t* spec,
+                             const Component& path,
+                             CanonOutput* output,
+                             Component* out_path);
+
+// Canonicalizes the input as a file path. This is like CanonicalizePath except
+// that it also handles Windows drive specs. For example, the path can begin
+// with "c|\" and it will get properly canonicalized to "C:/".
+// The string will be appended to |*output| and |*out_path| will be updated.
+//
+// The 8-bit version requires UTF-8 encoding.
+COMPONENT_EXPORT(URL)
+bool FileCanonicalizePath(const char* spec,
+                          const Component& path,
+                          CanonOutput* output,
+                          Component* out_path);
+COMPONENT_EXPORT(URL)
+bool FileCanonicalizePath(const char16_t* spec,
+                          const Component& path,
+                          CanonOutput* output,
+                          Component* out_path);
+
+// Query: Prepends the ? if needed.
+//
+// The 8-bit version requires the input to be UTF-8 encoding. Incorrectly
+// encoded characters (in UTF-8 or UTF-16) will be replaced with the Unicode
+// "invalid character." This function can not fail, we always just try to do
+// our best for crazy input here since web pages can set it themselves.
+//
+// This will convert the given input into the output encoding that the given
+// character set converter object provides. The converter will only be called
+// if necessary, for ASCII input, no conversions are necessary.
+//
+// The converter can be NULL. In this case, the output encoding will be UTF-8.
+COMPONENT_EXPORT(URL)
+void CanonicalizeQuery(const char* spec,
+                       const Component& query,
+                       CharsetConverter* converter,
+                       CanonOutput* output,
+                       Component* out_query);
+COMPONENT_EXPORT(URL)
+void CanonicalizeQuery(const char16_t* spec,
+                       const Component& query,
+                       CharsetConverter* converter,
+                       CanonOutput* output,
+                       Component* out_query);
+
+// Ref: Prepends the # if needed. The output will be UTF-8 (this is the only
+// canonicalizer that does not produce ASCII output). The output is
+// guaranteed to be valid UTF-8.
+//
+// This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use
+// the "Unicode replacement character" for the confusing bits and copy the rest.
+COMPONENT_EXPORT(URL)
+void CanonicalizeRef(const char* spec,
+                     const Component& path,
+                     CanonOutput* output,
+                     Component* out_path);
+COMPONENT_EXPORT(URL)
+void CanonicalizeRef(const char16_t* spec,
+                     const Component& path,
+                     CanonOutput* output,
+                     Component* out_path);
+
+// Full canonicalizer ---------------------------------------------------------
+//
+// These functions replace any string contents, rather than append as above.
+// See the above piece-by-piece functions for information specific to
+// canonicalizing individual components.
+//
+// The output will be ASCII except the reference fragment, which may be UTF-8.
+//
+// The 8-bit versions require UTF-8 encoding.
+
+// Use for standard URLs with authorities and paths.
+COMPONENT_EXPORT(URL)
+bool CanonicalizeStandardURL(const char* spec,
+                             int spec_len,
+                             const Parsed& parsed,
+                             SchemeType scheme_type,
+                             CharsetConverter* query_converter,
+                             CanonOutput* output,
+                             Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeStandardURL(const char16_t* spec,
+                             int spec_len,
+                             const Parsed& parsed,
+                             SchemeType scheme_type,
+                             CharsetConverter* query_converter,
+                             CanonOutput* output,
+                             Parsed* new_parsed);
+
+// Use for file URLs.
+COMPONENT_EXPORT(URL)
+bool CanonicalizeFileURL(const char* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         CharsetConverter* query_converter,
+                         CanonOutput* output,
+                         Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeFileURL(const char16_t* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         CharsetConverter* query_converter,
+                         CanonOutput* output,
+                         Parsed* new_parsed);
+
+// Use for filesystem URLs.
+COMPONENT_EXPORT(URL)
+bool CanonicalizeFileSystemURL(const char* spec,
+                               int spec_len,
+                               const Parsed& parsed,
+                               CharsetConverter* query_converter,
+                               CanonOutput* output,
+                               Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeFileSystemURL(const char16_t* spec,
+                               int spec_len,
+                               const Parsed& parsed,
+                               CharsetConverter* query_converter,
+                               CanonOutput* output,
+                               Parsed* new_parsed);
+
+// Use for path URLs such as javascript. This does not modify the path in any
+// way, for example, by escaping it.
+COMPONENT_EXPORT(URL)
+bool CanonicalizePathURL(const char* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         CanonOutput* output,
+                         Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizePathURL(const char16_t* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         CanonOutput* output,
+                         Parsed* new_parsed);
+
+// Use to canonicalize just the path component of a "path" URL; e.g. the
+// path of a javascript URL.
+COMPONENT_EXPORT(URL)
+void CanonicalizePathURLPath(const char* source,
+                             const Component& component,
+                             CanonOutput* output,
+                             Component* new_component);
+COMPONENT_EXPORT(URL)
+void CanonicalizePathURLPath(const char16_t* source,
+                             const Component& component,
+                             CanonOutput* output,
+                             Component* new_component);
+
+// Use for mailto URLs. This "canonicalizes" the URL into a path and query
+// component. It does not attempt to merge "to" fields. It uses UTF-8 for
+// the query encoding if there is a query. This is because a mailto URL is
+// really intended for an external mail program, and the encoding of a page,
+// etc. which would influence a query encoding normally are irrelevant.
+COMPONENT_EXPORT(URL)
+bool CanonicalizeMailtoURL(const char* spec,
+                           int spec_len,
+                           const Parsed& parsed,
+                           CanonOutput* output,
+                           Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool CanonicalizeMailtoURL(const char16_t* spec,
+                           int spec_len,
+                           const Parsed& parsed,
+                           CanonOutput* output,
+                           Parsed* new_parsed);
+
+// Part replacer --------------------------------------------------------------
+
+// Internal structure used for storing separate strings for each component.
+// The basic canonicalization functions use this structure internally so that
+// component replacement (different strings for different components) can be
+// treated on the same code path as regular canonicalization (the same string
+// for each component).
+//
+// A Parsed structure usually goes along with this. Those components identify
+// offsets within these strings, so that they can all be in the same string,
+// or spread arbitrarily across different ones.
+//
+// This structures does not own any data. It is the caller's responsibility to
+// ensure that the data the pointers point to stays in scope and is not
+// modified.
+template <typename CHAR>
+struct URLComponentSource {
+  // Constructor normally used by callers wishing to replace components. This
+  // will make them all NULL, which is no replacement. The caller would then
+  // override the components they want to replace.
+  URLComponentSource()
+      : scheme(nullptr),
+        username(nullptr),
+        password(nullptr),
+        host(nullptr),
+        port(nullptr),
+        path(nullptr),
+        query(nullptr),
+        ref(nullptr) {}
+
+  // Constructor normally used internally to initialize all the components to
+  // point to the same spec.
+  explicit URLComponentSource(const CHAR* default_value)
+      : scheme(default_value),
+        username(default_value),
+        password(default_value),
+        host(default_value),
+        port(default_value),
+        path(default_value),
+        query(default_value),
+        ref(default_value) {}
+
+  // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+  // #addr-of
+  RAW_PTR_EXCLUSION const CHAR* scheme;
+  // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+  // #addr-of
+  RAW_PTR_EXCLUSION const CHAR* username;
+  // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+  // #addr-of
+  RAW_PTR_EXCLUSION const CHAR* password;
+  // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+  // #addr-of
+  RAW_PTR_EXCLUSION const CHAR* host;
+  // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+  // #addr-of
+  RAW_PTR_EXCLUSION const CHAR* port;
+  // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+  // #addr-of
+  RAW_PTR_EXCLUSION const CHAR* path;
+  // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+  // #addr-of
+  RAW_PTR_EXCLUSION const CHAR* query;
+  // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+  // #addr-of
+  RAW_PTR_EXCLUSION const CHAR* ref;
+};
+
+// This structure encapsulates information on modifying a URL. Each component
+// may either be left unchanged, replaced, or deleted.
+//
+// By default, each component is unchanged. For those components that should be
+// modified, call either Set* or Clear* to modify it.
+//
+// The string passed to Set* functions DOES NOT GET COPIED AND MUST BE KEPT
+// IN SCOPE BY THE CALLER for as long as this object exists!
+//
+// Prefer the 8-bit replacement version if possible since it is more efficient.
+template <typename CHAR>
+class Replacements {
+ public:
+  Replacements() {}
+
+  // Scheme
+  void SetScheme(const CHAR* s, const Component& comp) {
+    sources_.scheme = s;
+    components_.scheme = comp;
+  }
+  // Note: we don't have a ClearScheme since this doesn't make any sense.
+  bool IsSchemeOverridden() const { return sources_.scheme != NULL; }
+
+  // Username
+  void SetUsername(const CHAR* s, const Component& comp) {
+    sources_.username = s;
+    components_.username = comp;
+  }
+  void ClearUsername() {
+    sources_.username = Placeholder();
+    components_.username = Component();
+  }
+  bool IsUsernameOverridden() const { return sources_.username != NULL; }
+
+  // Password
+  void SetPassword(const CHAR* s, const Component& comp) {
+    sources_.password = s;
+    components_.password = comp;
+  }
+  void ClearPassword() {
+    sources_.password = Placeholder();
+    components_.password = Component();
+  }
+  bool IsPasswordOverridden() const { return sources_.password != NULL; }
+
+  // Host
+  void SetHost(const CHAR* s, const Component& comp) {
+    sources_.host = s;
+    components_.host = comp;
+  }
+  void ClearHost() {
+    sources_.host = Placeholder();
+    components_.host = Component();
+  }
+  bool IsHostOverridden() const { return sources_.host != NULL; }
+
+  // Port
+  void SetPort(const CHAR* s, const Component& comp) {
+    sources_.port = s;
+    components_.port = comp;
+  }
+  void ClearPort() {
+    sources_.port = Placeholder();
+    components_.port = Component();
+  }
+  bool IsPortOverridden() const { return sources_.port != NULL; }
+
+  // Path
+  void SetPath(const CHAR* s, const Component& comp) {
+    sources_.path = s;
+    components_.path = comp;
+  }
+  void ClearPath() {
+    sources_.path = Placeholder();
+    components_.path = Component();
+  }
+  bool IsPathOverridden() const { return sources_.path != NULL; }
+
+  // Query
+  void SetQuery(const CHAR* s, const Component& comp) {
+    sources_.query = s;
+    components_.query = comp;
+  }
+  void ClearQuery() {
+    sources_.query = Placeholder();
+    components_.query = Component();
+  }
+  bool IsQueryOverridden() const { return sources_.query != NULL; }
+
+  // Ref
+  void SetRef(const CHAR* s, const Component& comp) {
+    sources_.ref = s;
+    components_.ref = comp;
+  }
+  void ClearRef() {
+    sources_.ref = Placeholder();
+    components_.ref = Component();
+  }
+  bool IsRefOverridden() const { return sources_.ref != NULL; }
+
+  // Getters for the internal data. See the variables below for how the
+  // information is encoded.
+  const URLComponentSource<CHAR>& sources() const { return sources_; }
+  const Parsed& components() const { return components_; }
+
+ private:
+  // Returns a pointer to a static empty string that is used as a placeholder
+  // to indicate a component should be deleted (see below).
+  const CHAR* Placeholder() {
+    static const CHAR empty_cstr = 0;
+    return &empty_cstr;
+  }
+
+  // We support three states:
+  //
+  // Action                 | Source                Component
+  // -----------------------+--------------------------------------------------
+  // Don't change component | NULL                  (unused)
+  // Replace component      | (replacement string)  (replacement component)
+  // Delete component       | (non-NULL)            (invalid component: (0,-1))
+  //
+  // We use a pointer to the empty string for the source when the component
+  // should be deleted.
+  URLComponentSource<CHAR> sources_;
+  Parsed components_;
+};
+
+// The base must be an 8-bit canonical URL.
+COMPONENT_EXPORT(URL)
+bool ReplaceStandardURL(const char* base,
+                        const Parsed& base_parsed,
+                        const Replacements<char>& replacements,
+                        SchemeType scheme_type,
+                        CharsetConverter* query_converter,
+                        CanonOutput* output,
+                        Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceStandardURL(const char* base,
+                        const Parsed& base_parsed,
+                        const Replacements<char16_t>& replacements,
+                        SchemeType scheme_type,
+                        CharsetConverter* query_converter,
+                        CanonOutput* output,
+                        Parsed* new_parsed);
+
+// Filesystem URLs can only have the path, query, or ref replaced.
+// All other components will be ignored.
+COMPONENT_EXPORT(URL)
+bool ReplaceFileSystemURL(const char* base,
+                          const Parsed& base_parsed,
+                          const Replacements<char>& replacements,
+                          CharsetConverter* query_converter,
+                          CanonOutput* output,
+                          Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceFileSystemURL(const char* base,
+                          const Parsed& base_parsed,
+                          const Replacements<char16_t>& replacements,
+                          CharsetConverter* query_converter,
+                          CanonOutput* output,
+                          Parsed* new_parsed);
+
+// Replacing some parts of a file URL is not permitted. Everything except
+// the host, path, query, and ref will be ignored.
+COMPONENT_EXPORT(URL)
+bool ReplaceFileURL(const char* base,
+                    const Parsed& base_parsed,
+                    const Replacements<char>& replacements,
+                    CharsetConverter* query_converter,
+                    CanonOutput* output,
+                    Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceFileURL(const char* base,
+                    const Parsed& base_parsed,
+                    const Replacements<char16_t>& replacements,
+                    CharsetConverter* query_converter,
+                    CanonOutput* output,
+                    Parsed* new_parsed);
+
+// Path URLs can only have the scheme and path replaced. All other components
+// will be ignored.
+COMPONENT_EXPORT(URL)
+bool ReplacePathURL(const char* base,
+                    const Parsed& base_parsed,
+                    const Replacements<char>& replacements,
+                    CanonOutput* output,
+                    Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplacePathURL(const char* base,
+                    const Parsed& base_parsed,
+                    const Replacements<char16_t>& replacements,
+                    CanonOutput* output,
+                    Parsed* new_parsed);
+
+// Mailto URLs can only have the scheme, path, and query replaced.
+// All other components will be ignored.
+COMPONENT_EXPORT(URL)
+bool ReplaceMailtoURL(const char* base,
+                      const Parsed& base_parsed,
+                      const Replacements<char>& replacements,
+                      CanonOutput* output,
+                      Parsed* new_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceMailtoURL(const char* base,
+                      const Parsed& base_parsed,
+                      const Replacements<char16_t>& replacements,
+                      CanonOutput* output,
+                      Parsed* new_parsed);
+
+// Relative URL ---------------------------------------------------------------
+
+// Given an input URL or URL fragment |fragment|, determines if it is a
+// relative or absolute URL and places the result into |*is_relative|. If it is
+// relative, the relevant portion of the URL will be placed into
+// |*relative_component| (there may have been trimmed whitespace, for example).
+// This value is passed to ResolveRelativeURL. If the input is not relative,
+// this value is UNDEFINED (it may be changed by the function).
+//
+// Returns true on success (we successfully determined the URL is relative or
+// not). Failure means that the combination of URLs doesn't make any sense.
+//
+// The base URL should always be canonical, therefore is ASCII.
+COMPONENT_EXPORT(URL)
+bool IsRelativeURL(const char* base,
+                   const Parsed& base_parsed,
+                   const char* fragment,
+                   int fragment_len,
+                   bool is_base_hierarchical,
+                   bool* is_relative,
+                   Component* relative_component);
+COMPONENT_EXPORT(URL)
+bool IsRelativeURL(const char* base,
+                   const Parsed& base_parsed,
+                   const char16_t* fragment,
+                   int fragment_len,
+                   bool is_base_hierarchical,
+                   bool* is_relative,
+                   Component* relative_component);
+
+// Given a canonical parsed source URL, a URL fragment known to be relative,
+// and the identified relevant portion of the relative URL (computed by
+// IsRelativeURL), this produces a new parsed canonical URL in |output| and
+// |out_parsed|.
+//
+// It also requires a flag indicating whether the base URL is a file: URL
+// which triggers additional logic.
+//
+// The base URL should be canonical and have a host (may be empty for file
+// URLs) and a path. If it doesn't have these, we can't resolve relative
+// URLs off of it and will return the base as the output with an error flag.
+// Because it is canonical is should also be ASCII.
+//
+// The query charset converter follows the same rules as CanonicalizeQuery.
+//
+// Returns true on success. On failure, the output will be "something
+// reasonable" that will be consistent and valid, just probably not what
+// was intended by the web page author or caller.
+COMPONENT_EXPORT(URL)
+bool ResolveRelativeURL(const char* base_url,
+                        const Parsed& base_parsed,
+                        bool base_is_file,
+                        const char* relative_url,
+                        const Component& relative_component,
+                        CharsetConverter* query_converter,
+                        CanonOutput* output,
+                        Parsed* out_parsed);
+COMPONENT_EXPORT(URL)
+bool ResolveRelativeURL(const char* base_url,
+                        const Parsed& base_parsed,
+                        bool base_is_file,
+                        const char16_t* relative_url,
+                        const Component& relative_component,
+                        CharsetConverter* query_converter,
+                        CanonOutput* output,
+                        Parsed* out_parsed);
+
+}  // namespace url
+
+#endif  // URL_URL_CANON_H_
diff --git a/url_canon_etc.cc b/url_canon_etc.cc
new file mode 100644
index 00000000000..3d1cb938edb
--- /dev/null
+++ b/url_canon_etc.cc
@@ -0,0 +1,428 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Canonicalizers for random bits that aren't big enough for their own files.
+
+#include <string.h>
+
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+
+namespace url {
+
+namespace {
+
+// Returns true if the given character should be removed from the middle of a
+// URL.
+inline bool IsRemovableURLWhitespace(int ch) {
+  return ch == '\r' || ch == '\n' || ch == '\t';
+}
+
+// Backend for RemoveURLWhitespace (see declaration in url_canon.h).
+// It sucks that we have to do this, since this takes about 13% of the total URL
+// canonicalization time.
+template <typename CHAR>
+const CHAR* DoRemoveURLWhitespace(const CHAR* input,
+                                  int input_len,
+                                  CanonOutputT<CHAR>* buffer,
+                                  int* output_len,
+                                  bool* potentially_dangling_markup) {
+  // Fast verification that there's nothing that needs removal. This is the 99%
+  // case, so we want it to be fast and don't care about impacting the speed
+  // when we do find whitespace.
+  bool found_whitespace = false;
+  if (sizeof(*input) == 1 && input_len >= kMinimumLengthForSIMD) {
+    // For large strings, memchr is much faster than any scalar code we can
+    // write, even if we need to run it three times. (If this turns out to still
+    // be a bottleneck, we could write our own vector code, but given that
+    // memchr is so fast, it's unlikely to be relevant.)
+    found_whitespace = memchr(input, '\n', input_len) != nullptr ||
+                       memchr(input, '\r', input_len) != nullptr ||
+                       memchr(input, '\t', input_len) != nullptr;
+  } else {
+    for (int i = 0; i < input_len; i++) {
+      if (!IsRemovableURLWhitespace(input[i]))
+        continue;
+      found_whitespace = true;
+      break;
+    }
+  }
+
+  if (!found_whitespace) {
+    // Didn't find any whitespace, we don't need to do anything. We can just
+    // return the input as the output.
+    *output_len = input_len;
+    return input;
+  }
+
+  // Skip whitespace removal for `data:` URLs.
+  //
+  // TODO(mkwst): Ideally, this would use something like `base::StartsWith`, but
+  // that turns out to be difficult to do correctly given this function's
+  // character type templating.
+  if (input_len > 5 && input[0] == 'd' && input[1] == 'a' && input[2] == 't' &&
+      input[3] == 'a' && input[4] == ':') {
+    *output_len = input_len;
+    return input;
+  }
+
+  // Remove the whitespace into the new buffer and return it.
+  for (int i = 0; i < input_len; i++) {
+    if (!IsRemovableURLWhitespace(input[i])) {
+      if (potentially_dangling_markup && input[i] == 0x3C)
+        *potentially_dangling_markup = true;
+      buffer->push_back(input[i]);
+    }
+  }
+  *output_len = buffer->length();
+  return buffer->data();
+}
+
+// Contains the canonical version of each possible input letter in the scheme
+// (basically, lower-cased). The corresponding entry will be 0 if the letter
+// is not allowed in a scheme.
+// clang-format off
+const char kSchemeCanonical[0x80] = {
+// 00-1f: all are invalid
+     0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+     0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+//  ' '   !    "    #    $    %    &    '    (    )    *    +    ,    -    .    /
+     0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  '+',  0,  '-', '.',  0,
+//   0    1    2    3    4    5    6    7    8    9    :    ;    <    =    >    ?
+    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',  0 ,  0 ,  0 ,  0 ,  0 ,  0 ,
+//   @    A    B    C    D    E    F    G    H    I    J    K    L    M    N    O
+     0 , 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+//   P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^    _
+    'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',  0,   0 ,  0,   0 ,  0,
+//   `    a    b    c    d    e    f    g    h    i    j    k    l    m    n    o
+     0 , 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+//   p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~
+    'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',  0 ,  0 ,  0 ,  0 ,  0 };
+// clang-format on
+
+// This could be a table lookup as well by setting the high bit for each
+// valid character, but it's only called once per URL, and it makes the lookup
+// table easier to read not having extra stuff in it.
+inline bool IsSchemeFirstChar(unsigned char c) {
+  return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+template <typename CHAR, typename UCHAR>
+bool DoScheme(const CHAR* spec,
+              const Component& scheme,
+              CanonOutput* output,
+              Component* out_scheme) {
+  if (scheme.is_empty()) {
+    // Scheme is unspecified or empty, convert to empty by appending a colon.
+    *out_scheme = Component(output->length(), 0);
+    output->push_back(':');
+    return false;
+  }
+
+  // The output scheme starts from the current position.
+  out_scheme->begin = output->length();
+
+  // Danger: it's important that this code does not strip any characters;
+  // it only emits the canonical version (be it valid or escaped) for each
+  // of the input characters. Stripping would put it out of sync with
+  // FindAndCompareScheme, which could cause some security checks on
+  // schemes to be incorrect.
+  bool success = true;
+  size_t begin = static_cast<size_t>(scheme.begin);
+  size_t end = static_cast<size_t>(scheme.end());
+  for (size_t i = begin; i < end; i++) {
+    UCHAR ch = static_cast<UCHAR>(spec[i]);
+    char replacement = 0;
+    if (ch < 0x80) {
+      if (i == begin) {
+        // Need to do a special check for the first letter of the scheme.
+        if (IsSchemeFirstChar(static_cast<unsigned char>(ch)))
+          replacement = kSchemeCanonical[ch];
+      } else {
+        replacement = kSchemeCanonical[ch];
+      }
+    }
+
+    if (replacement) {
+      output->push_back(replacement);
+    } else if (ch == '%') {
+      // Canonicalizing the scheme multiple times should lead to the same
+      // result. Since invalid characters will be escaped, we need to preserve
+      // the percent to avoid multiple escaping. The scheme will be invalid.
+      success = false;
+      output->push_back('%');
+    } else {
+      // Invalid character, store it but mark this scheme as invalid.
+      success = false;
+
+      // This will escape the output and also handle encoding issues.
+      // Ignore the return value since we already failed.
+      AppendUTF8EscapedChar(spec, &i, end, output);
+    }
+  }
+
+  // The output scheme ends with the the current position, before appending
+  // the colon.
+  out_scheme->len = output->length() - out_scheme->begin;
+  output->push_back(':');
+  return success;
+}
+
+// The username and password components reference ranges in the corresponding
+// *_spec strings. Typically, these specs will be the same (we're
+// canonicalizing a single source string), but may be different when
+// replacing components.
+template <typename CHAR, typename UCHAR>
+bool DoUserInfo(const CHAR* username_spec,
+                const Component& username,
+                const CHAR* password_spec,
+                const Component& password,
+                CanonOutput* output,
+                Component* out_username,
+                Component* out_password) {
+  if (username.is_empty() && password.is_empty()) {
+    // Common case: no user info. We strip empty username/passwords.
+    *out_username = Component();
+    *out_password = Component();
+    return true;
+  }
+
+  // Write the username.
+  out_username->begin = output->length();
+  if (username.is_nonempty()) {
+    // This will escape characters not valid for the username.
+    AppendStringOfType(&username_spec[username.begin],
+                       static_cast<size_t>(username.len), CHAR_USERINFO,
+                       output);
+  }
+  out_username->len = output->length() - out_username->begin;
+
+  // When there is a password, we need the separator. Note that we strip
+  // empty but specified passwords.
+  if (password.is_nonempty()) {
+    output->push_back(':');
+    out_password->begin = output->length();
+    AppendStringOfType(&password_spec[password.begin],
+                       static_cast<size_t>(password.len), CHAR_USERINFO,
+                       output);
+    out_password->len = output->length() - out_password->begin;
+  } else {
+    *out_password = Component();
+  }
+
+  output->push_back('@');
+  return true;
+}
+
+// Helper functions for converting port integers to strings.
+inline void WritePortInt(char* output, int output_len, int port) {
+  _itoa_s(port, output, output_len, 10);
+}
+
+// This function will prepend the colon if there will be a port.
+template <typename CHAR, typename UCHAR>
+bool DoPort(const CHAR* spec,
+            const Component& port,
+            int default_port_for_scheme,
+            CanonOutput* output,
+            Component* out_port) {
+  int port_num = ParsePort(spec, port);
+  if (port_num == PORT_UNSPECIFIED || port_num == default_port_for_scheme) {
+    *out_port = Component();
+    return true;  // Leave port empty.
+  }
+
+  if (port_num == PORT_INVALID) {
+    // Invalid port: We'll copy the text from the input so the user can see
+    // what the error was, and mark the URL as invalid by returning false.
+    output->push_back(':');
+    out_port->begin = output->length();
+    AppendInvalidNarrowString(spec, static_cast<size_t>(port.begin),
+                              static_cast<size_t>(port.end()), output);
+    out_port->len = output->length() - out_port->begin;
+    return false;
+  }
+
+  // Convert port number back to an integer. Max port value is 5 digits, and
+  // the Parsed::ExtractPort will have made sure the integer is in range.
+  const int buf_size = 6;
+  char buf[buf_size];
+  WritePortInt(buf, buf_size, port_num);
+
+  // Append the port number to the output, preceded by a colon.
+  output->push_back(':');
+  out_port->begin = output->length();
+  for (int i = 0; i < buf_size && buf[i]; i++)
+    output->push_back(buf[i]);
+
+  out_port->len = output->length() - out_port->begin;
+  return true;
+}
+
+// clang-format off
+//   Percent-escape all characters from the fragment percent-encode set
+//   https://url.spec.whatwg.org/#fragment-percent-encode-set
+const bool kShouldEscapeCharInFragment[0x80] = {
+//  Control characters (0x00-0x1F)
+    true,  true,  true,  true,  true,  true,  true,  true,
+    true,  true,  true,  true,  true,  true,  true,  true,
+    true,  true,  true,  true,  true,  true,  true,  true,
+    true,  true,  true,  true,  true,  true,  true,  true,
+//  ' '    !      "      #      $      %      &      '
+    true,  false, true,  false, false, false, false, false,
+//  (      )      *      +      ,      -      .      /
+    false, false, false, false, false, false, false, false,
+//  0      1      2      3      4      5      6      7
+    false, false, false, false, false, false, false, false,
+//  8      9      :      ;      <      =      >      ?
+    false, false, false, false, true,  false, true,  false,
+//  @      A      B      C      D      E      F      G
+    false, false, false, false, false, false, false, false,
+//  H      I      J      K      L      M      N      O
+    false, false, false, false, false, false, false, false,
+//  P      Q      R      S      T      U      V      W
+    false, false, false, false, false, false, false, false,
+//  X      Y      Z      [      \      ]      ^      _
+    false, false, false, false, false, false, false, false,
+//  `      a      b      c      d      e      f      g
+    true,  false, false, false, false, false, false, false,
+//  h      i      j      k      l      m      n      o
+    false, false, false, false, false, false, false, false,
+//  p      q      r      s      t      u      v      w
+    false, false, false, false, false, false, false, false,
+//  x      y      z      {      |      }      ~      DELETE
+    false, false, false, false, false, false, false, true
+};
+// clang-format on
+
+template <typename CHAR, typename UCHAR>
+void DoCanonicalizeRef(const CHAR* spec,
+                       const Component& ref,
+                       CanonOutput* output,
+                       Component* out_ref) {
+  if (!ref.is_valid()) {
+    // Common case of no ref.
+    *out_ref = Component();
+    return;
+  }
+
+  // Append the ref separator. Note that we need to do this even when the ref
+  // is empty but present.
+  output->push_back('#');
+  out_ref->begin = output->length();
+
+  // Now iterate through all the characters, converting to UTF-8 and validating.
+  size_t end = static_cast<size_t>(ref.end());
+  for (size_t i = static_cast<size_t>(ref.begin); i < end; i++) {
+    UCHAR current_char = static_cast<UCHAR>(spec[i]);
+    if (current_char < 0x80) {
+      if (kShouldEscapeCharInFragment[current_char])
+        AppendEscapedChar(static_cast<unsigned char>(spec[i]), output);
+      else
+        output->push_back(static_cast<char>(spec[i]));
+    } else {
+      AppendUTF8EscapedChar(spec, &i, end, output);
+    }
+  }
+
+  out_ref->len = output->length() - out_ref->begin;
+}
+
+}  // namespace
+
+const char* RemoveURLWhitespace(const char* input,
+                                int input_len,
+                                CanonOutputT<char>* buffer,
+                                int* output_len,
+                                bool* potentially_dangling_markup) {
+  return DoRemoveURLWhitespace(input, input_len, buffer, output_len,
+                               potentially_dangling_markup);
+}
+
+const char16_t* RemoveURLWhitespace(const char16_t* input,
+                                    int input_len,
+                                    CanonOutputT<char16_t>* buffer,
+                                    int* output_len,
+                                    bool* potentially_dangling_markup) {
+  return DoRemoveURLWhitespace(input, input_len, buffer, output_len,
+                               potentially_dangling_markup);
+}
+
+char CanonicalSchemeChar(char16_t ch) {
+  if (ch >= 0x80)
+    return 0;  // Non-ASCII is not supported by schemes.
+  return kSchemeCanonical[ch];
+}
+
+bool CanonicalizeScheme(const char* spec,
+                        const Component& scheme,
+                        CanonOutput* output,
+                        Component* out_scheme) {
+  return DoScheme<char, unsigned char>(spec, scheme, output, out_scheme);
+}
+
+bool CanonicalizeScheme(const char16_t* spec,
+                        const Component& scheme,
+                        CanonOutput* output,
+                        Component* out_scheme) {
+  return DoScheme<char16_t, char16_t>(spec, scheme, output, out_scheme);
+}
+
+bool CanonicalizeUserInfo(const char* username_source,
+                          const Component& username,
+                          const char* password_source,
+                          const Component& password,
+                          CanonOutput* output,
+                          Component* out_username,
+                          Component* out_password) {
+  return DoUserInfo<char, unsigned char>(username_source, username,
+                                         password_source, password, output,
+                                         out_username, out_password);
+}
+
+bool CanonicalizeUserInfo(const char16_t* username_source,
+                          const Component& username,
+                          const char16_t* password_source,
+                          const Component& password,
+                          CanonOutput* output,
+                          Component* out_username,
+                          Component* out_password) {
+  return DoUserInfo<char16_t, char16_t>(username_source, username,
+                                        password_source, password, output,
+                                        out_username, out_password);
+}
+
+bool CanonicalizePort(const char* spec,
+                      const Component& port,
+                      int default_port_for_scheme,
+                      CanonOutput* output,
+                      Component* out_port) {
+  return DoPort<char, unsigned char>(spec, port, default_port_for_scheme,
+                                     output, out_port);
+}
+
+bool CanonicalizePort(const char16_t* spec,
+                      const Component& port,
+                      int default_port_for_scheme,
+                      CanonOutput* output,
+                      Component* out_port) {
+  return DoPort<char16_t, char16_t>(spec, port, default_port_for_scheme, output,
+                                    out_port);
+}
+
+void CanonicalizeRef(const char* spec,
+                     const Component& ref,
+                     CanonOutput* output,
+                     Component* out_ref) {
+  DoCanonicalizeRef<char, unsigned char>(spec, ref, output, out_ref);
+}
+
+void CanonicalizeRef(const char16_t* spec,
+                     const Component& ref,
+                     CanonOutput* output,
+                     Component* out_ref) {
+  DoCanonicalizeRef<char16_t, char16_t>(spec, ref, output, out_ref);
+}
+
+}  // namespace url
diff --git a/url_canon_filesystemurl.cc b/url_canon_filesystemurl.cc
new file mode 100644
index 00000000000..0472484de7a
--- /dev/null
+++ b/url_canon_filesystemurl.cc
@@ -0,0 +1,135 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Functions for canonicalizing "filesystem:file:" URLs.
+
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+#include "url/url_file.h"
+#include "url/url_parse_internal.h"
+#include "url/url_util.h"
+#include "url/url_util_internal.h"
+
+namespace url {
+
+namespace {
+
+// We use the URLComponentSource for the outer URL, as it can have replacements,
+// whereas the inner_url can't, so it uses spec.
+template<typename CHAR, typename UCHAR>
+bool DoCanonicalizeFileSystemURL(const CHAR* spec,
+                                 const URLComponentSource<CHAR>& source,
+                                 const Parsed& parsed,
+                                 CharsetConverter* charset_converter,
+                                 CanonOutput* output,
+                                 Parsed* new_parsed) {
+  // filesystem only uses {scheme, path, query, ref} -- clear the rest.
+  new_parsed->username.reset();
+  new_parsed->password.reset();
+  new_parsed->host.reset();
+  new_parsed->port.reset();
+
+  const Parsed* inner_parsed = parsed.inner_parsed();
+  Parsed new_inner_parsed;
+
+  // Scheme (known, so we don't bother running it through the more
+  // complicated scheme canonicalizer).
+  new_parsed->scheme.begin = output->length();
+  output->Append("filesystem:", 11);
+  new_parsed->scheme.len = 10;
+
+  if (!inner_parsed || !inner_parsed->scheme.is_valid())
+    return false;
+
+  bool success = true;
+  SchemeType inner_scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
+  if (CompareSchemeComponent(spec, inner_parsed->scheme, url::kFileScheme)) {
+    new_inner_parsed.scheme.begin = output->length();
+    output->Append("file://", 7);
+    new_inner_parsed.scheme.len = 4;
+    success &= CanonicalizePath(spec, inner_parsed->path, output,
+                                &new_inner_parsed.path);
+  } else if (GetStandardSchemeType(spec, inner_parsed->scheme,
+                                   &inner_scheme_type)) {
+    if (inner_scheme_type == SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION) {
+      // Strip out the user information from the inner URL, if any.
+      inner_scheme_type = SCHEME_WITH_HOST_AND_PORT;
+    }
+    success = CanonicalizeStandardURL(
+        spec, inner_parsed->Length(), *inner_parsed, inner_scheme_type,
+        charset_converter, output, &new_inner_parsed);
+  } else {
+    // TODO(ericu): The URL is wrong, but should we try to output more of what
+    // we were given?  Echoing back filesystem:mailto etc. doesn't seem all that
+    // useful.
+    return false;
+  }
+  // The filesystem type must be more than just a leading slash for validity.
+  success &= new_inner_parsed.path.len > 1;
+
+  success &= CanonicalizePath(source.path, parsed.path, output,
+                              &new_parsed->path);
+
+  // Ignore failures for query/ref since the URL can probably still be loaded.
+  CanonicalizeQuery(source.query, parsed.query, charset_converter,
+                    output, &new_parsed->query);
+  CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
+  if (success)
+    new_parsed->set_inner_parsed(new_inner_parsed);
+
+  return success;
+}
+
+}  // namespace
+
+bool CanonicalizeFileSystemURL(const char* spec,
+                               int spec_len,
+                               const Parsed& parsed,
+                               CharsetConverter* charset_converter,
+                               CanonOutput* output,
+                               Parsed* new_parsed) {
+  return DoCanonicalizeFileSystemURL<char, unsigned char>(
+      spec, URLComponentSource<char>(spec), parsed, charset_converter, output,
+      new_parsed);
+}
+
+bool CanonicalizeFileSystemURL(const char16_t* spec,
+                               int spec_len,
+                               const Parsed& parsed,
+                               CharsetConverter* charset_converter,
+                               CanonOutput* output,
+                               Parsed* new_parsed) {
+  return DoCanonicalizeFileSystemURL<char16_t, char16_t>(
+      spec, URLComponentSource<char16_t>(spec), parsed, charset_converter,
+      output, new_parsed);
+}
+
+bool ReplaceFileSystemURL(const char* base,
+                          const Parsed& base_parsed,
+                          const Replacements<char>& replacements,
+                          CharsetConverter* charset_converter,
+                          CanonOutput* output,
+                          Parsed* new_parsed) {
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupOverrideComponents(base, replacements, &source, &parsed);
+  return DoCanonicalizeFileSystemURL<char, unsigned char>(
+      base, source, parsed, charset_converter, output, new_parsed);
+}
+
+bool ReplaceFileSystemURL(const char* base,
+                          const Parsed& base_parsed,
+                          const Replacements<char16_t>& replacements,
+                          CharsetConverter* charset_converter,
+                          CanonOutput* output,
+                          Parsed* new_parsed) {
+  RawCanonOutput<1024> utf8;
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
+  return DoCanonicalizeFileSystemURL<char, unsigned char>(
+      base, source, parsed, charset_converter, output, new_parsed);
+}
+
+}  // namespace url
diff --git a/url_canon_fileurl.cc b/url_canon_fileurl.cc
new file mode 100644
index 00000000000..b45114d77fd
--- /dev/null
+++ b/url_canon_fileurl.cc
@@ -0,0 +1,247 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Functions for canonicalizing "file:" URLs.
+
+#include "base/strings/string_piece.h"
+#include "base/strings/string_util.h"
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+#include "url/url_file.h"
+#include "url/url_parse_internal.h"
+
+namespace url {
+
+namespace {
+
+bool IsLocalhost(const char* spec, int begin, int end) {
+  if (begin > end)
+    return false;
+  return base::StringPiece(&spec[begin], end - begin) == "localhost";
+}
+
+bool IsLocalhost(const char16_t* spec, int begin, int end) {
+  if (begin > end)
+    return false;
+  return base::StringPiece16(&spec[begin], end - begin) == u"localhost";
+}
+
+template <typename CHAR>
+int DoFindWindowsDriveLetter(const CHAR* spec, int begin, int end) {
+  if (begin > end)
+    return -1;
+
+  // First guess the beginning of the drive letter.
+  // If there is something that looks like a drive letter in the spec between
+  // begin and end, store its position in drive_letter_pos.
+  int drive_letter_pos =
+      DoesContainWindowsDriveSpecUntil(spec, begin, end, end);
+  if (drive_letter_pos < begin)
+    return -1;
+
+  // Check if the path up to the drive letter candidate can be canonicalized as
+  // "/".
+  Component sub_path = MakeRange(begin, drive_letter_pos);
+  RawCanonOutput<1024> output;
+  Component output_path;
+  bool success = CanonicalizePath(spec, sub_path, &output, &output_path);
+  if (!success || output_path.len != 1 || output.at(output_path.begin) != '/') {
+    return -1;
+  }
+
+  return drive_letter_pos;
+}
+
+#ifdef WIN32
+
+// Given a pointer into the spec, this copies and canonicalizes the drive
+// letter and colon to the output, if one is found. If there is not a drive
+// spec, it won't do anything. The index of the next character in the input
+// spec is returned (after the colon when a drive spec is found, the begin
+// offset if one is not).
+template <typename CHAR>
+int FileDoDriveSpec(const CHAR* spec, int begin, int end, CanonOutput* output) {
+  int drive_letter_pos = FindWindowsDriveLetter(spec, begin, end);
+  if (drive_letter_pos < begin)
+    return begin;
+
+  // By now, a valid drive letter is confirmed at position drive_letter_pos,
+  // followed by a valid drive letter separator (a colon or a pipe).
+
+  output->push_back('/');
+
+  // Normalize Windows drive letters to uppercase.
+  if (base::IsAsciiLower(spec[drive_letter_pos]))
+    output->push_back(static_cast<char>(spec[drive_letter_pos] - 'a' + 'A'));
+  else
+    output->push_back(static_cast<char>(spec[drive_letter_pos]));
+
+  // Normalize the character following it to a colon rather than pipe.
+  output->push_back(':');
+  return drive_letter_pos + 2;
+}
+
+#endif  // WIN32
+
+template<typename CHAR, typename UCHAR>
+bool DoFileCanonicalizePath(const CHAR* spec,
+                            const Component& path,
+                            CanonOutput* output,
+                            Component* out_path) {
+  // Copies and normalizes the "c:" at the beginning, if present.
+  out_path->begin = output->length();
+  int after_drive;
+#ifdef WIN32
+  after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output);
+#else
+  after_drive = path.begin;
+#endif
+
+  // Copies the rest of the path, starting from the slash following the
+  // drive colon (if any, Windows only), or the first slash of the path.
+  bool success = true;
+  if (after_drive < path.end()) {
+    // Use the regular path canonicalizer to canonicalize the rest of the path
+    // after the drive.
+    //
+    // Give it a fake output component to write into, since we will be
+    // calculating the out_path ourselves (consisting of both the drive and the
+    // path we canonicalize here).
+    Component sub_path = MakeRange(after_drive, path.end());
+    Component fake_output_path;
+    success = CanonicalizePath(spec, sub_path, output, &fake_output_path);
+  } else if (after_drive == path.begin) {
+    // No input path and no drive spec, canonicalize to a slash.
+    output->push_back('/');
+  }
+
+  out_path->len = output->length() - out_path->begin;
+  return success;
+}
+
+template<typename CHAR, typename UCHAR>
+bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
+                           const Parsed& parsed,
+                           CharsetConverter* query_converter,
+                           CanonOutput* output,
+                           Parsed* new_parsed) {
+  // Things we don't set in file: URLs.
+  new_parsed->username = Component();
+  new_parsed->password = Component();
+  new_parsed->port = Component();
+
+  // Scheme (known, so we don't bother running it through the more
+  // complicated scheme canonicalizer).
+  new_parsed->scheme.begin = output->length();
+  output->Append("file://", 7);
+  new_parsed->scheme.len = 4;
+
+  // If the host is localhost, and the path starts with a Windows drive letter,
+  // remove the host component. This does the following transformation:
+  //     file://localhost/C:/hello.txt -> file:///C:/hello.txt
+  //
+  // Note: we do this on every platform per URL Standard, not just Windows.
+  //
+  // TODO(https://crbug.com/688961): According to the latest URL spec, this
+  // transformation should be done regardless of the path.
+  Component host_range = parsed.host;
+  if (IsLocalhost(source.host, host_range.begin, host_range.end()) &&
+      FindWindowsDriveLetter(source.path, parsed.path.begin,
+                             parsed.path.end()) >= parsed.path.begin) {
+    host_range.reset();
+  }
+
+  // Append the host. For many file URLs, this will be empty. For UNC, this
+  // will be present.
+  // TODO(brettw) This doesn't do any checking for host name validity. We
+  // should probably handle validity checking of UNC hosts differently than
+  // for regular IP hosts.
+  bool success =
+      CanonicalizeHost(source.host, host_range, output, &new_parsed->host);
+  success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path,
+                                    output, &new_parsed->path);
+
+  CanonicalizeQuery(source.query, parsed.query, query_converter,
+                    output, &new_parsed->query);
+  CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
+
+  return success;
+}
+
+} // namespace
+
+int FindWindowsDriveLetter(const char* spec, int begin, int end) {
+  return DoFindWindowsDriveLetter(spec, begin, end);
+}
+
+int FindWindowsDriveLetter(const char16_t* spec, int begin, int end) {
+  return DoFindWindowsDriveLetter(spec, begin, end);
+}
+
+bool CanonicalizeFileURL(const char* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         CharsetConverter* query_converter,
+                         CanonOutput* output,
+                         Parsed* new_parsed) {
+  return DoCanonicalizeFileURL<char, unsigned char>(
+      URLComponentSource<char>(spec), parsed, query_converter,
+      output, new_parsed);
+}
+
+bool CanonicalizeFileURL(const char16_t* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         CharsetConverter* query_converter,
+                         CanonOutput* output,
+                         Parsed* new_parsed) {
+  return DoCanonicalizeFileURL<char16_t, char16_t>(
+      URLComponentSource<char16_t>(spec), parsed, query_converter, output,
+      new_parsed);
+}
+
+bool FileCanonicalizePath(const char* spec,
+                          const Component& path,
+                          CanonOutput* output,
+                          Component* out_path) {
+  return DoFileCanonicalizePath<char, unsigned char>(spec, path,
+                                                     output, out_path);
+}
+
+bool FileCanonicalizePath(const char16_t* spec,
+                          const Component& path,
+                          CanonOutput* output,
+                          Component* out_path) {
+  return DoFileCanonicalizePath<char16_t, char16_t>(spec, path, output,
+                                                    out_path);
+}
+
+bool ReplaceFileURL(const char* base,
+                    const Parsed& base_parsed,
+                    const Replacements<char>& replacements,
+                    CharsetConverter* query_converter,
+                    CanonOutput* output,
+                    Parsed* new_parsed) {
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupOverrideComponents(base, replacements, &source, &parsed);
+  return DoCanonicalizeFileURL<char, unsigned char>(
+      source, parsed, query_converter, output, new_parsed);
+}
+
+bool ReplaceFileURL(const char* base,
+                    const Parsed& base_parsed,
+                    const Replacements<char16_t>& replacements,
+                    CharsetConverter* query_converter,
+                    CanonOutput* output,
+                    Parsed* new_parsed) {
+  RawCanonOutput<1024> utf8;
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
+  return DoCanonicalizeFileURL<char, unsigned char>(
+      source, parsed, query_converter, output, new_parsed);
+}
+
+}  // namespace url
diff --git a/url_canon_host.cc b/url_canon_host.cc
new file mode 100644
index 00000000000..d3b1222f17c
--- /dev/null
+++ b/url_canon_host.cc
@@ -0,0 +1,442 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/check.h"
+#include "base/cpu_reduction_experiment.h"
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+
+namespace url {
+
+namespace {
+
+// For reference, here's what IE supports:
+// Key: 0 (disallowed: failure if present in the input)
+//      + (allowed either escaped or unescaped, and unmodified)
+//      U (allowed escaped or unescaped but always unescaped if present in
+//         escaped form)
+//      E (allowed escaped or unescaped but always escaped if present in
+//         unescaped form)
+//      % (only allowed escaped in the input, will be unmodified).
+//      I left blank alpha numeric characters.
+//
+//    00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
+//    -----------------------------------------------
+// 0   0  E  E  E  E  E  E  E  E  E  E  E  E  E  E  E
+// 1   E  E  E  E  E  E  E  E  E  E  E  E  E  E  E  E
+// 2   E  +  E  E  +  E  +  +  +  +  +  +  +  U  U  0
+// 3                                 %  %  E  +  E  0  <-- Those are  : ; < = > ?
+// 4   %
+// 5                                    U  0  U  U  U  <-- Those are  [ \ ] ^ _
+// 6   E                                               <-- That's  `
+// 7                                    E  E  E  U  E  <-- Those are { | } ~ (UNPRINTABLE)
+//
+// NOTE: I didn't actually test all the control characters. Some may be
+// disallowed in the input, but they are all accepted escaped except for 0.
+// I also didn't test if characters affecting HTML parsing are allowed
+// unescaped, e.g. (") or (#), which would indicate the beginning of the path.
+// Surprisingly, space is accepted in the input and always escaped.
+
+// This table lists the canonical version of all characters we allow in the
+// input, with 0 indicating it is disallowed. We use the magic kEscapedHostChar
+// value to indicate that this character should be escaped. We are a little more
+// restrictive than IE, but less restrictive than Firefox.
+//
+// Note that we disallow the % character. We will allow it when part of an
+// escape sequence, of course, but this disallows "%25". Even though IE allows
+// it, allowing it would put us in a funny state. If there was an invalid
+// escape sequence like "%zz", we'll add "%25zz" to the output and fail.
+// Allowing percents means we'll succeed a second time, so validity would change
+// based on how many times you run the canonicalizer. We prefer to always report
+// the same vailidity, so reject this.
+const unsigned char kEsc = 0xff;
+const unsigned char kHostCharLookup[0x80] = {
+// 00-1f: all are invalid
+     0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+     0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+//  ' '   !    "    #    $    %    &    '    (    )    *    +    ,    -    .    /
+   kEsc,kEsc,kEsc,kEsc,kEsc,  0, kEsc,kEsc,kEsc,kEsc,kEsc, '+',kEsc, '-', '.',  0,
+//   0    1    2    3    4    5    6    7    8    9    :    ;    <    =    >    ?
+    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':',  0 ,kEsc,kEsc,kEsc,  0 ,
+//   @    A    B    C    D    E    F    G    H    I    J    K    L    M    N    O
+   kEsc, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+//   P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^    _
+    'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '[',  0 , ']',  0 , '_',
+//   `    a    b    c    d    e    f    g    h    i    j    k    l    m    n    o
+   kEsc, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+//   p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~
+    'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',kEsc,kEsc,kEsc,  0 ,  0 };
+
+// RFC1034 maximum FQDN length.
+constexpr size_t kMaxHostLength = 253;
+
+// Generous padding to account for the fact that UTS#46 normalization can cause
+// a long string to actually shrink and fit within the 253 character RFC1034
+// FQDN length limit. Note that this can still be too short for pathological
+// cases: An arbitrary number of characters (e.g. U+00AD SOFT HYPHEN) can be
+// removed from the input by UTS#46 processing. However, this should be
+// sufficient for all normally-encountered, non-abusive hostname strings.
+constexpr size_t kMaxHostBufferLength = kMaxHostLength * 5;
+
+constexpr size_t kTempHostBufferLen = 1024;
+using StackBuffer = RawCanonOutputT<char, kTempHostBufferLen>;
+using StackBufferW = RawCanonOutputT<char16_t, kTempHostBufferLen>;
+
+// Scans a host name and fills in the output flags according to what we find.
+// |has_non_ascii| will be true if there are any non-7-bit characters, and
+// |has_escaped| will be true if there is a percent sign.
+template<typename CHAR, typename UCHAR>
+void ScanHostname(const CHAR* spec,
+                  const Component& host,
+                  bool* has_non_ascii,
+                  bool* has_escaped) {
+  int end = host.end();
+  *has_non_ascii = false;
+  *has_escaped = false;
+  for (int i = host.begin; i < end; i++) {
+    if (static_cast<UCHAR>(spec[i]) >= 0x80)
+      *has_non_ascii = true;
+    else if (spec[i] == '%')
+      *has_escaped = true;
+  }
+}
+
+// Canonicalizes a host name that is entirely 8-bit characters (even though
+// the type holding them may be 16 bits. Escaped characters will be unescaped.
+// Non-7-bit characters (for example, UTF-8) will be passed unchanged.
+//
+// The |*has_non_ascii| flag will be true if there are non-7-bit characters in
+// the output.
+//
+// This function is used in two situations:
+//
+//  * When the caller knows there is no non-ASCII or percent escaped
+//    characters. This is what DoHost does. The result will be a completely
+//    canonicalized host since we know nothing weird can happen (escaped
+//    characters could be unescaped to non-7-bit, so they have to be treated
+//    with suspicion at this point). It does not use the |has_non_ascii| flag.
+//
+//  * When the caller has an 8-bit string that may need unescaping.
+//    DoComplexHost calls us this situation to do unescaping and validation.
+//    After this, it may do other IDN operations depending on the value of the
+//    |*has_non_ascii| flag.
+//
+// The return value indicates if the output is a potentially valid host name.
+template <typename INCHAR, typename OUTCHAR>
+bool DoSimpleHost(const INCHAR* host,
+                  size_t host_len,
+                  CanonOutputT<OUTCHAR>* output,
+                  bool* has_non_ascii) {
+  *has_non_ascii = false;
+
+  bool success = true;
+  for (size_t i = 0; i < host_len; ++i) {
+    unsigned int source = host[i];
+    if (source == '%') {
+      // Unescape first, if possible.
+      // Source will be used only if decode operation was successful.
+      if (!DecodeEscaped(host, &i, host_len,
+                         reinterpret_cast<unsigned char*>(&source))) {
+        // Invalid escaped character. There is nothing that can make this
+        // host valid. We append an escaped percent so the URL looks reasonable
+        // and mark as failed.
+        AppendEscapedChar('%', output);
+        success = false;
+        continue;
+      }
+    }
+
+    if (source < 0x80) {
+      // We have ASCII input, we can use our lookup table.
+      unsigned char replacement = kHostCharLookup[source];
+      if (!replacement) {
+        // Invalid character, add it as percent-escaped and mark as failed.
+        AppendEscapedChar(source, output);
+        success = false;
+      } else if (replacement == kEsc) {
+        // This character is valid but should be escaped.
+        AppendEscapedChar(source, output);
+      } else {
+        // Common case, the given character is valid in a hostname, the lookup
+        // table tells us the canonical representation of that character (lower
+        // cased).
+        output->push_back(replacement);
+      }
+    } else {
+      // It's a non-ascii char. Just push it to the output.
+      // In case where we have char16 input, and char output it's safe to
+      // cast char16->char only if input string was converted to ASCII.
+      output->push_back(static_cast<OUTCHAR>(source));
+      *has_non_ascii = true;
+    }
+  }
+  return success;
+}
+
+// Canonicalizes a host that requires IDN conversion. Returns true on success
+bool DoIDNHost(const char16_t* src, size_t src_len, CanonOutput* output) {
+  int original_output_len = output->length();  // So we can rewind below.
+
+  // We need to escape URL before doing IDN conversion, since punicode strings
+  // cannot be escaped after they are created.
+  RawCanonOutputW<kTempHostBufferLen> url_escaped_host;
+  bool has_non_ascii;
+  DoSimpleHost(src, src_len, &url_escaped_host, &has_non_ascii);
+  if (url_escaped_host.length() > kMaxHostBufferLength) {
+    AppendInvalidNarrowString(src, 0, src_len, output);
+    return false;
+  }
+
+  StackBufferW wide_output;
+  if (!IDNToASCII(url_escaped_host.data(),
+                  url_escaped_host.length(),
+                  &wide_output)) {
+    // Some error, give up. This will write some reasonable looking
+    // representation of the string to the output.
+    AppendInvalidNarrowString(src, 0, src_len, output);
+    return false;
+  }
+
+  // Now we check the ASCII output like a normal host. It will also handle
+  // unescaping. Although we unescaped everything before this function call, if
+  // somebody does %00 as fullwidth, ICU will convert this to ASCII.
+  bool success = DoSimpleHost(wide_output.data(), wide_output.length(), output,
+                              &has_non_ascii);
+  if (has_non_ascii) {
+    // ICU generated something that DoSimpleHost didn't think looked like
+    // ASCII. This is quite rare, but ICU might convert some characters to
+    // percent signs which might generate new escape sequences which might in
+    // turn be invalid. An example is U+FE6A "small percent" which ICU will
+    // name prep into an ASCII percent and then we can interpret the following
+    // characters as escaped characters.
+    //
+    // If DoSimpleHost didn't think the output was ASCII, just escape the
+    // thing we gave ICU and give up. DoSimpleHost will have handled a further
+    // level of escaping from ICU for simple ASCII cases (i.e. if ICU generates
+    // a new escaped ASCII sequence like "%41" we'll unescape it) but it won't
+    // do more (like handle escaped non-ASCII sequences). Handling the escaped
+    // ASCII isn't strictly necessary, but DoSimpleHost handles this case
+    // anyway so we handle it/
+    output->set_length(original_output_len);
+    AppendInvalidNarrowString(wide_output.data(), 0, wide_output.length(),
+                              output);
+    return false;
+  }
+  return success;
+}
+
+// 8-bit convert host to its ASCII version: this converts the UTF-8 input to
+// UTF-16. The has_escaped flag should be set if the input string requires
+// unescaping.
+bool DoComplexHost(const char* host,
+                   size_t host_len,
+                   bool has_non_ascii,
+                   bool has_escaped,
+                   CanonOutput* output) {
+  // Save the current position in the output. We may write stuff and rewind it
+  // below, so we need to know where to rewind to.
+  size_t begin_length = output->length();
+
+  // Points to the UTF-8 data we want to convert. This will either be the
+  // input or the unescaped version written to |*output| if necessary.
+  const char* utf8_source;
+  size_t utf8_source_len;
+  bool are_all_escaped_valid = true;
+  if (has_escaped) {
+    // Unescape before converting to UTF-16 for IDN. We write this into the
+    // output because it most likely does not require IDNization, and we can
+    // save another huge stack buffer. It will be replaced below if it requires
+    // IDN. This will also update our non-ASCII flag so we know whether the
+    // unescaped input requires IDN.
+    if (!DoSimpleHost(host, host_len, output, &has_non_ascii)) {
+      // Error with some escape sequence. We'll call the current output
+      // complete. DoSimpleHost will have written some "reasonable" output
+      // for the invalid escapes, but the output could be non-ASCII and
+      // needs to go through re-encoding below.
+      are_all_escaped_valid = false;
+    }
+
+    // Unescaping may have left us with ASCII input, in which case the
+    // unescaped version we wrote to output is complete.
+    if (!has_non_ascii) {
+      return are_all_escaped_valid;
+    }
+
+    // Save the pointer into the data was just converted (it may be appended to
+    // other data in the output buffer).
+    utf8_source = &output->data()[begin_length];
+    utf8_source_len = output->length() - begin_length;
+  } else {
+    // We don't need to unescape, use input for IDNization later. (We know the
+    // input has non-ASCII, or the simple version would have been called
+    // instead of us.)
+    utf8_source = host;
+    utf8_source_len = host_len;
+  }
+
+  // Non-ASCII input requires IDN, convert to UTF-16 and do the IDN conversion.
+  // Above, we may have used the output to write the unescaped values to, so
+  // we have to rewind it to where we started after we convert it to UTF-16.
+  StackBufferW utf16;
+  if (!ConvertUTF8ToUTF16(utf8_source, utf8_source_len, &utf16)) {
+    // In this error case, the input may or may not be the output.
+    StackBuffer utf8;
+    for (size_t i = 0; i < utf8_source_len; i++)
+      utf8.push_back(utf8_source[i]);
+    output->set_length(begin_length);
+    AppendInvalidNarrowString(utf8.data(), 0, utf8.length(), output);
+    return false;
+  }
+  output->set_length(begin_length);
+
+  // This will call DoSimpleHost which will do normal ASCII canonicalization
+  // and also check for IP addresses in the outpt.
+  return DoIDNHost(utf16.data(), utf16.length(), output) &&
+         are_all_escaped_valid;
+}
+
+// UTF-16 convert host to its ASCII version. The set up is already ready for
+// the backend, so we just pass through. The has_escaped flag should be set if
+// the input string requires unescaping.
+bool DoComplexHost(const char16_t* host,
+                   size_t host_len,
+                   bool has_non_ascii,
+                   bool has_escaped,
+                   CanonOutput* output) {
+  if (has_escaped) {
+    // Yikes, we have escaped characters with wide input. The escaped
+    // characters should be interpreted as UTF-8. To solve this problem,
+    // we convert to UTF-8, unescape, then convert back to UTF-16 for IDN.
+    //
+    // We don't bother to optimize the conversion in the ASCII case (which
+    // *could* just be a copy) and use the UTF-8 path, because it should be
+    // very rare that host names have escaped characters, and it is relatively
+    // fast to do the conversion anyway.
+    StackBuffer utf8;
+    if (!ConvertUTF16ToUTF8(host, host_len, &utf8)) {
+      AppendInvalidNarrowString(host, 0, host_len, output);
+      return false;
+    }
+
+    // Once we convert to UTF-8, we can use the 8-bit version of the complex
+    // host handling code above.
+    return DoComplexHost(utf8.data(), utf8.length(), has_non_ascii, has_escaped,
+                         output);
+  }
+
+  // No unescaping necessary, we can safely pass the input to ICU. This
+  // function will only get called if we either have escaped or non-ascii
+  // input, so it's safe to just use ICU now. Even if the input is ASCII,
+  // this function will do the right thing (just slower than we could).
+  return DoIDNHost(host, host_len, output);
+}
+
+template <typename CHAR, typename UCHAR>
+bool DoHostSubstring(const CHAR* spec,
+                     const Component& host,
+                     CanonOutput* output) {
+  DCHECK(host.is_valid());
+
+  bool has_non_ascii, has_escaped;
+  ScanHostname<CHAR, UCHAR>(spec, host, &has_non_ascii, &has_escaped);
+
+  if (has_non_ascii || has_escaped) {
+    return DoComplexHost(&spec[host.begin], static_cast<size_t>(host.len),
+                         has_non_ascii, has_escaped, output);
+  }
+
+  const bool success = DoSimpleHost(
+      &spec[host.begin], static_cast<size_t>(host.len), output, &has_non_ascii);
+  DCHECK(!has_non_ascii);
+  return success;
+}
+
+template <typename CHAR, typename UCHAR>
+void DoHost(const CHAR* spec,
+            const Component& host,
+            CanonOutput* output,
+            CanonHostInfo* host_info) {
+  if (host.is_empty()) {
+    // Empty hosts don't need anything.
+    host_info->family = CanonHostInfo::NEUTRAL;
+    host_info->out_host = Component();
+    return;
+  }
+
+  // Keep track of output's initial length, so we can rewind later.
+  const int output_begin = output->length();
+
+  if (DoHostSubstring<CHAR, UCHAR>(spec, host, output)) {
+    // After all the other canonicalization, check if we ended up with an IP
+    // address. IP addresses are small, so writing into this temporary buffer
+    // should not cause an allocation.
+    RawCanonOutput<64> canon_ip;
+    CanonicalizeIPAddress(output->data(),
+                          MakeRange(output_begin, output->length()),
+                          &canon_ip, host_info);
+
+    // If we got an IPv4/IPv6 address, copy the canonical form back to the
+    // real buffer. Otherwise, it's a hostname or broken IP, in which case
+    // we just leave it in place.
+    if (host_info->IsIPAddress()) {
+      output->set_length(output_begin);
+      output->Append(canon_ip.data(), canon_ip.length());
+    }
+  } else {
+    // Canonicalization failed. Set BROKEN to notify the caller.
+    host_info->family = CanonHostInfo::BROKEN;
+  }
+
+  host_info->out_host = MakeRange(output_begin, output->length());
+}
+
+}  // namespace
+
+bool CanonicalizeHost(const char* spec,
+                      const Component& host,
+                      CanonOutput* output,
+                      Component* out_host) {
+  CanonHostInfo host_info;
+  DoHost<char, unsigned char>(spec, host, output, &host_info);
+  *out_host = host_info.out_host;
+  return (host_info.family != CanonHostInfo::BROKEN);
+}
+
+bool CanonicalizeHost(const char16_t* spec,
+                      const Component& host,
+                      CanonOutput* output,
+                      Component* out_host) {
+  CanonHostInfo host_info;
+  DoHost<char16_t, char16_t>(spec, host, output, &host_info);
+  *out_host = host_info.out_host;
+  return (host_info.family != CanonHostInfo::BROKEN);
+}
+
+void CanonicalizeHostVerbose(const char* spec,
+                             const Component& host,
+                             CanonOutput* output,
+                             CanonHostInfo* host_info) {
+  DoHost<char, unsigned char>(spec, host, output, host_info);
+}
+
+void CanonicalizeHostVerbose(const char16_t* spec,
+                             const Component& host,
+                             CanonOutput* output,
+                             CanonHostInfo* host_info) {
+  DoHost<char16_t, char16_t>(spec, host, output, host_info);
+}
+
+bool CanonicalizeHostSubstring(const char* spec,
+                               const Component& host,
+                               CanonOutput* output) {
+  return DoHostSubstring<char, unsigned char>(spec, host, output);
+}
+
+bool CanonicalizeHostSubstring(const char16_t* spec,
+                               const Component& host,
+                               CanonOutput* output) {
+  return DoHostSubstring<char16_t, char16_t>(spec, host, output);
+}
+
+}  // namespace url
diff --git a/url_canon_icu.cc b/url_canon_icu.cc
new file mode 100644
index 00000000000..5adc187748d
--- /dev/null
+++ b/url_canon_icu.cc
@@ -0,0 +1,114 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// ICU-based character set converter.
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "base/check.h"
+#include "base/memory/raw_ptr.h"
+#include "base/memory/raw_ptr_exclusion.h"
+#include "third_party/icu/source/common/unicode/ucnv.h"
+#include "third_party/icu/source/common/unicode/ucnv_cb.h"
+#include "third_party/icu/source/common/unicode/utypes.h"
+#include "url/url_canon_icu.h"
+#include "url/url_canon_internal.h"  // for _itoa_s
+
+namespace url {
+
+namespace {
+
+// Called when converting a character that can not be represented, this will
+// append an escaped version of the numerical character reference for that code
+// point. It is of the form "&#1234;" and we will escape the non-digits to
+// "%26%231234%3B". Why? This is what Netscape did back in the olden days.
+void appendURLEscapedChar(const void* context,
+                          UConverterFromUnicodeArgs* from_args,
+                          const UChar* code_units,
+                          int32_t length,
+                          UChar32 code_point,
+                          UConverterCallbackReason reason,
+                          UErrorCode* err) {
+  if (reason == UCNV_UNASSIGNED) {
+    *err = U_ZERO_ERROR;
+
+    const static int prefix_len = 6;
+    const static char prefix[prefix_len + 1] = "%26%23";  // "&#" percent-escaped
+    ucnv_cbFromUWriteBytes(from_args, prefix, prefix_len, 0, err);
+
+    DCHECK(code_point < 0x110000);
+    char number[8];  // Max Unicode code point is 7 digits.
+    _itoa_s(code_point, number, 10);
+    int number_len = static_cast<int>(strlen(number));
+    ucnv_cbFromUWriteBytes(from_args, number, number_len, 0, err);
+
+    const static int postfix_len = 3;
+    const static char postfix[postfix_len + 1] = "%3B";   // ";" percent-escaped
+    ucnv_cbFromUWriteBytes(from_args, postfix, postfix_len, 0, err);
+  }
+}
+
+// A class for scoping the installation of the invalid character callback.
+class AppendHandlerInstaller {
+ public:
+  // The owner of this object must ensure that the converter is alive for the
+  // duration of this object's lifetime.
+  AppendHandlerInstaller(UConverter* converter) : converter_(converter) {
+    UErrorCode err = U_ZERO_ERROR;
+    ucnv_setFromUCallBack(converter_, appendURLEscapedChar, 0,
+                          &old_callback_, &old_context_, &err);
+  }
+
+  ~AppendHandlerInstaller() {
+    UErrorCode err = U_ZERO_ERROR;
+    ucnv_setFromUCallBack(converter_, old_callback_, old_context_, 0, 0, &err);
+  }
+
+ private:
+  raw_ptr<UConverter> converter_;
+
+  UConverterFromUCallback old_callback_;
+  // This field is not a raw_ptr<> because it was filtered by the rewriter for:
+  // #addr-of
+  RAW_PTR_EXCLUSION const void* old_context_;
+};
+
+}  // namespace
+
+ICUCharsetConverter::ICUCharsetConverter(UConverter* converter)
+    : converter_(converter) {
+}
+
+ICUCharsetConverter::~ICUCharsetConverter() = default;
+
+void ICUCharsetConverter::ConvertFromUTF16(const char16_t* input,
+                                           int input_len,
+                                           CanonOutput* output) {
+  // Install our error handler. It will be called for character that can not
+  // be represented in the destination character set.
+  AppendHandlerInstaller handler(converter_);
+
+  int begin_offset = output->length();
+  int dest_capacity = output->capacity() - begin_offset;
+  output->set_length(output->length());
+
+  do {
+    UErrorCode err = U_ZERO_ERROR;
+    char* dest = &output->data()[begin_offset];
+    int required_capacity = ucnv_fromUChars(converter_, dest, dest_capacity,
+                                            input, input_len, &err);
+    if (err != U_BUFFER_OVERFLOW_ERROR) {
+      output->set_length(begin_offset + required_capacity);
+      return;
+    }
+
+    // Output didn't fit, expand
+    dest_capacity = required_capacity;
+    output->Resize(begin_offset + dest_capacity);
+  } while (true);
+}
+
+}  // namespace url
diff --git a/url_canon_icu.h b/url_canon_icu.h
new file mode 100644
index 00000000000..cb5da7d37b0
--- /dev/null
+++ b/url_canon_icu.h
@@ -0,0 +1,41 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_CANON_ICU_H_
+#define URL_URL_CANON_ICU_H_
+
+// ICU integration functions.
+
+#include "base/compiler_specific.h"
+#include "base/component_export.h"
+#include "base/memory/raw_ptr.h"
+#include "url/url_canon.h"
+
+typedef struct UConverter UConverter;
+
+namespace url {
+
+// An implementation of CharsetConverter that implementations can use to
+// interface the canonicalizer with ICU's conversion routines.
+class COMPONENT_EXPORT(URL) ICUCharsetConverter : public CharsetConverter {
+ public:
+  // Constructs a converter using an already-existing ICU character set
+  // converter. This converter is NOT owned by this object; the lifetime must
+  // be managed by the creator such that it is alive as long as this is.
+  ICUCharsetConverter(UConverter* converter);
+
+  ~ICUCharsetConverter() override;
+
+  void ConvertFromUTF16(const char16_t* input,
+                        int input_len,
+                        CanonOutput* output) override;
+
+ private:
+  // The ICU converter, not owned by this class.
+  raw_ptr<UConverter> converter_;
+};
+
+}  // namespace url
+
+#endif  // URL_URL_CANON_ICU_H_
diff --git a/url_canon_icu_unittest.cc b/url_canon_icu_unittest.cc
new file mode 100644
index 00000000000..336da3f20b2
--- /dev/null
+++ b/url_canon_icu_unittest.cc
@@ -0,0 +1,168 @@
+// Copyright 2014 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/url_canon_icu.h"
+
+#include <stddef.h>
+
+#include "base/logging.h"
+#include "base/memory/raw_ptr.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/icu/source/common/unicode/ucnv.h"
+#include "url/url_canon.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_test_utils.h"
+
+namespace url {
+
+namespace {
+
+// Wrapper around a UConverter object that managers creation and destruction.
+class UConvScoper {
+ public:
+  explicit UConvScoper(const char* charset_name) {
+    UErrorCode err = U_ZERO_ERROR;
+    converter_ = ucnv_open(charset_name, &err);
+    if (!converter_) {
+      LOG(ERROR) << "Failed to open charset " << charset_name << ": "
+                 << u_errorName(err);
+    }
+  }
+
+  ~UConvScoper() {
+    if (converter_)
+      ucnv_close(converter_.ExtractAsDangling());
+  }
+
+  // Returns the converter object, may be NULL.
+  UConverter* converter() const { return converter_; }
+
+ private:
+  raw_ptr<UConverter> converter_;
+};
+
+TEST(URLCanonIcuTest, ICUCharsetConverter) {
+  struct ICUCase {
+    const wchar_t* input;
+    const char* encoding;
+    const char* expected;
+  } icu_cases[] = {
+      // UTF-8.
+    {L"Hello, world", "utf-8", "Hello, world"},
+    {L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"},
+      // Non-BMP UTF-8.
+    {L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"},
+      // Big5
+    {L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"},
+      // Unrepresentable character in the destination set.
+    {L"hello\x4f60\x06de\x597dworld", "big5",
+      "hello\xa7\x41%26%231758%3B\xa6\x6eworld"},
+  };
+
+  for (size_t i = 0; i < std::size(icu_cases); i++) {
+    UConvScoper conv(icu_cases[i].encoding);
+    ASSERT_TRUE(conv.converter() != NULL);
+    ICUCharsetConverter converter(conv.converter());
+
+    std::string str;
+    StdStringCanonOutput output(&str);
+
+    std::u16string input_str(
+        test_utils::TruncateWStringToUTF16(icu_cases[i].input));
+    int input_len = static_cast<int>(input_str.length());
+    converter.ConvertFromUTF16(input_str.c_str(), input_len, &output);
+    output.Complete();
+
+    EXPECT_STREQ(icu_cases[i].expected, str.c_str());
+  }
+
+  // Test string sizes around the resize boundary for the output to make sure
+  // the converter resizes as needed.
+  const int static_size = 16;
+  UConvScoper conv("utf-8");
+  ASSERT_TRUE(conv.converter());
+  ICUCharsetConverter converter(conv.converter());
+  for (int i = static_size - 2; i <= static_size + 2; i++) {
+    // Make a string with the appropriate length.
+    std::u16string input;
+    for (int ch = 0; ch < i; ch++)
+      input.push_back('a');
+
+    RawCanonOutput<static_size> output;
+    converter.ConvertFromUTF16(input.c_str(), static_cast<int>(input.length()),
+                               &output);
+    EXPECT_EQ(input.length(), output.length());
+  }
+}
+
+TEST(URLCanonIcuTest, QueryWithConverter) {
+  struct QueryCase {
+    const char* input8;
+    const wchar_t* input16;
+    const char* encoding;
+    const char* expected;
+  } query_cases[] = {
+      // Regular ASCII case in some different encodings.
+    {"foo=bar", L"foo=bar", "utf-8", "?foo=bar"},
+    {"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"},
+    {"foo=bar", L"foo=bar", "gb2312", "?foo=bar"},
+      // Chinese input/output
+    {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "gb2312",
+      "?q=%C4%E3%BA%C3"},
+    {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"},
+      // Unencodable character in the destination character set should be
+      // escaped. The escape sequence unescapes to be the entity name:
+      // "?q=&#20320;"
+    {"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1",
+      "?q=Chinese%26%2365319%3B"},
+  };
+
+  for (size_t i = 0; i < std::size(query_cases); i++) {
+    Component out_comp;
+
+    UConvScoper conv(query_cases[i].encoding);
+    ASSERT_TRUE(!query_cases[i].encoding || conv.converter());
+    ICUCharsetConverter converter(conv.converter());
+
+    if (query_cases[i].input8) {
+      int len = static_cast<int>(strlen(query_cases[i].input8));
+      Component in_comp(0, len);
+      std::string out_str;
+
+      StdStringCanonOutput output(&out_str);
+      CanonicalizeQuery(query_cases[i].input8, in_comp, &converter, &output,
+                        &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(query_cases[i].expected, out_str);
+    }
+
+    if (query_cases[i].input16) {
+      std::u16string input16(
+          test_utils::TruncateWStringToUTF16(query_cases[i].input16));
+      int len = static_cast<int>(input16.length());
+      Component in_comp(0, len);
+      std::string out_str;
+
+      StdStringCanonOutput output(&out_str);
+      CanonicalizeQuery(input16.c_str(), in_comp, &converter, &output,
+                        &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(query_cases[i].expected, out_str);
+    }
+  }
+
+  // Extra test for input with embedded NULL;
+  std::string out_str;
+  StdStringCanonOutput output(&out_str);
+  Component out_comp;
+  CanonicalizeQuery("a \x00z\x01", Component(0, 5), NULL, &output, &out_comp);
+  output.Complete();
+  EXPECT_EQ("?a%20%00z%01", out_str);
+}
+
+}  // namespace
+
+}  // namespace url
diff --git a/url_canon_internal.cc b/url_canon_internal.cc
new file mode 100644
index 00000000000..f6219209e72
--- /dev/null
+++ b/url_canon_internal.cc
@@ -0,0 +1,502 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/url_canon_internal.h"
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#ifdef __SSE2__
+#include <immintrin.h>
+#elif defined(__aarch64__)
+#include <arm_neon.h>
+#endif
+
+#include <cstdio>
+#include <string>
+
+#include "base/bits.h"
+#include "base/numerics/safe_conversions.h"
+#include "base/strings/utf_string_conversion_utils.h"
+
+namespace url {
+
+namespace {
+
+// Find the initial segment of the given string that consists solely
+// of characters valid for CHAR_QUERY. (We can have false negatives in
+// one specific case, namely the exclamation mark 0x21, but false negatives
+// are fine, and it's not worth adding a separate test for.) This is
+// a fast path to speed up checking of very long query strings that are
+// already valid, which happen on some web pages.
+//
+// This has some startup cost to load the constants and such, so it's
+// usually not worth it for short strings.
+size_t FindInitialQuerySafeString(const char* source, size_t length) {
+#if defined(__SSE2__) || defined(__aarch64__)
+  constexpr size_t kChunkSize = 16;
+  size_t i;
+  for (i = 0; i < base::bits::AlignDown(length, kChunkSize); i += kChunkSize) {
+    char b __attribute__((vector_size(16)));
+    memcpy(&b, source + i, sizeof(b));
+
+    // Compare each element with the ranges for CHAR_QUERY
+    // (see kSharedCharTypeTable), vectorized so that it creates
+    // a mask of which elements match. For completeness, we could
+    // have had (...) | b == 0x21 here, but exclamation marks are
+    // rare and the extra test costs us some time.
+    auto mask = b >= 0x24 && b <= 0x7e && b != 0x27 && b != 0x3c && b != 0x3e;
+
+#ifdef __SSE2__
+    if (_mm_movemask_epi8(reinterpret_cast<__m128i>(mask)) != 0xffff) {
+      return i;
+    }
+#else
+    if (vminvq_u8(reinterpret_cast<uint8x16_t>(mask)) == 0) {
+      return i;
+    }
+#endif
+  }
+  return i;
+#else
+  // Need SIMD support (with fast reductions) for this to be efficient.
+  return 0;
+#endif
+}
+
+template <typename CHAR, typename UCHAR>
+void DoAppendStringOfType(const CHAR* source,
+                          size_t length,
+                          SharedCharTypes type,
+                          CanonOutput* output) {
+  size_t i = 0;
+  // We only instantiate this for char, to avoid a Clang crash
+  // (and because Append() does not support converting).
+  if constexpr (sizeof(CHAR) == 1) {
+    if (type == CHAR_QUERY && length >= kMinimumLengthForSIMD) {
+      i = FindInitialQuerySafeString(source, length);
+      output->Append(source, i);
+    }
+  }
+  for (; i < length; i++) {
+    if (static_cast<UCHAR>(source[i]) >= 0x80) {
+      // ReadChar will fill the code point with kUnicodeReplacementCharacter
+      // when the input is invalid, which is what we want.
+      base_icu::UChar32 code_point;
+      ReadUTFChar(source, &i, length, &code_point);
+      AppendUTF8EscapedValue(code_point, output);
+    } else {
+      // Just append the 7-bit character, possibly escaping it.
+      unsigned char uch = static_cast<unsigned char>(source[i]);
+      if (!IsCharOfType(uch, type))
+        AppendEscapedChar(uch, output);
+      else
+        output->push_back(uch);
+    }
+  }
+}
+
+// This function assumes the input values are all contained in 8-bit,
+// although it allows any type. Returns true if input is valid, false if not.
+template <typename CHAR, typename UCHAR>
+void DoAppendInvalidNarrowString(const CHAR* spec,
+                                 size_t begin,
+                                 size_t end,
+                                 CanonOutput* output) {
+  for (size_t i = begin; i < end; i++) {
+    UCHAR uch = static_cast<UCHAR>(spec[i]);
+    if (uch >= 0x80) {
+      // Handle UTF-8/16 encodings. This call will correctly handle the error
+      // case by appending the invalid character.
+      AppendUTF8EscapedChar(spec, &i, end, output);
+    } else if (uch <= ' ' || uch == 0x7f) {
+      // This function is for error handling, so we escape all control
+      // characters and spaces, but not anything else since we lack
+      // context to do something more specific.
+      AppendEscapedChar(static_cast<unsigned char>(uch), output);
+    } else {
+      output->push_back(static_cast<char>(uch));
+    }
+  }
+}
+
+// Overrides one component, see the Replacements structure for
+// what the various combionations of source pointer and component mean.
+void DoOverrideComponent(const char* override_source,
+                         const Component& override_component,
+                         const char** dest,
+                         Component* dest_component) {
+  if (override_source) {
+    *dest = override_source;
+    *dest_component = override_component;
+  }
+}
+
+// Similar to DoOverrideComponent except that it takes a UTF-16 input and does
+// not actually set the output character pointer.
+//
+// The input is converted to UTF-8 at the end of the given buffer as a temporary
+// holding place. The component identifying the portion of the buffer used in
+// the |utf8_buffer| will be specified in |*dest_component|.
+//
+// This will not actually set any |dest| pointer like DoOverrideComponent
+// does because all of the pointers will point into the |utf8_buffer|, which
+// may get resized while we're overriding a subsequent component. Instead, the
+// caller should use the beginning of the |utf8_buffer| as the string pointer
+// for all components once all overrides have been prepared.
+bool PrepareUTF16OverrideComponent(const char16_t* override_source,
+                                   const Component& override_component,
+                                   CanonOutput* utf8_buffer,
+                                   Component* dest_component) {
+  bool success = true;
+  if (override_source) {
+    if (!override_component.is_valid()) {
+      // Non-"valid" component (means delete), so we need to preserve that.
+      *dest_component = Component();
+    } else {
+      // Convert to UTF-8.
+      dest_component->begin = utf8_buffer->length();
+      success = ConvertUTF16ToUTF8(&override_source[override_component.begin],
+                                   static_cast<size_t>(override_component.len),
+                                   utf8_buffer);
+      dest_component->len = utf8_buffer->length() - dest_component->begin;
+    }
+  }
+  return success;
+}
+
+}  // namespace
+
+// See the header file for this array's declaration.
+// clang-format off
+const unsigned char kSharedCharTypeTable[0x100] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x00 - 0x0f
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x10 - 0x1f
+    0,                           // 0x20  ' ' (escape spaces in queries)
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x21  !
+    0,                           // 0x22  "
+    0,                           // 0x23  #  (invalid in query since it marks the ref)
+    CHAR_QUERY | CHAR_USERINFO,  // 0x24  $
+    CHAR_QUERY | CHAR_USERINFO,  // 0x25  %
+    CHAR_QUERY | CHAR_USERINFO,  // 0x26  &
+    0,                           // 0x27  '  (Try to prevent XSS.)
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x28  (
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x29  )
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x2a  *
+    CHAR_QUERY | CHAR_USERINFO,  // 0x2b  +
+    CHAR_QUERY | CHAR_USERINFO,  // 0x2c  ,
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x2d  -
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT,  // 0x2e  .
+    CHAR_QUERY,                  // 0x2f  /
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x30  0
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x31  1
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x32  2
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x33  3
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x34  4
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x35  5
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x36  6
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x37  7
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_COMPONENT,             // 0x38  8
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_COMPONENT,             // 0x39  9
+    CHAR_QUERY,  // 0x3a  :
+    CHAR_QUERY,  // 0x3b  ;
+    0,           // 0x3c  <  (Try to prevent certain types of XSS.)
+    CHAR_QUERY,  // 0x3d  =
+    0,           // 0x3e  >  (Try to prevent certain types of XSS.)
+    CHAR_QUERY,  // 0x3f  ?
+    CHAR_QUERY,  // 0x40  @
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x41  A
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x42  B
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x43  C
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x44  D
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x45  E
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x46  F
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x47  G
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x48  H
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x49  I
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4a  J
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4b  K
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4c  L
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4d  M
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4e  N
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4f  O
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x50  P
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x51  Q
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x52  R
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x53  S
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x54  T
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x55  U
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x56  V
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x57  W
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT, // 0x58  X
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x59  Y
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x5a  Z
+    CHAR_QUERY,  // 0x5b  [
+    CHAR_QUERY,  // 0x5c  '\'
+    CHAR_QUERY,  // 0x5d  ]
+    CHAR_QUERY,  // 0x5e  ^
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x5f  _
+    CHAR_QUERY,  // 0x60  `
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x61  a
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x62  b
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x63  c
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x64  d
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x65  e
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x66  f
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x67  g
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x68  h
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x69  i
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6a  j
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6b  k
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6c  l
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6d  m
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6e  n
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6f  o
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x70  p
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x71  q
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x72  r
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x73  s
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x74  t
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x75  u
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x76  v
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x77  w
+    CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT,  // 0x78  x
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x79  y
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x7a  z
+    CHAR_QUERY,  // 0x7b  {
+    CHAR_QUERY,  // 0x7c  |
+    CHAR_QUERY,  // 0x7d  }
+    CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x7e  ~
+    0,           // 0x7f
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x80 - 0x8f
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x90 - 0x9f
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xa0 - 0xaf
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xb0 - 0xbf
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xc0 - 0xcf
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xd0 - 0xdf
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xe0 - 0xef
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xf0 - 0xff
+};
+// clang-format on
+
+const char kHexCharLookup[0x10] = {
+    '0', '1', '2', '3', '4', '5', '6', '7',
+    '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+};
+
+const char kCharToHexLookup[8] = {
+    0,         // 0x00 - 0x1f
+    '0',       // 0x20 - 0x3f: digits 0 - 9 are 0x30 - 0x39
+    'A' - 10,  // 0x40 - 0x5f: letters A - F are 0x41 - 0x46
+    'a' - 10,  // 0x60 - 0x7f: letters a - f are 0x61 - 0x66
+    0,         // 0x80 - 0x9F
+    0,         // 0xA0 - 0xBF
+    0,         // 0xC0 - 0xDF
+    0,         // 0xE0 - 0xFF
+};
+
+const base_icu::UChar32 kUnicodeReplacementCharacter = 0xfffd;
+
+void AppendStringOfType(const char* source,
+                        size_t length,
+                        SharedCharTypes type,
+                        CanonOutput* output) {
+  DoAppendStringOfType<char, unsigned char>(source, length, type, output);
+}
+
+void AppendStringOfType(const char16_t* source,
+                        size_t length,
+                        SharedCharTypes type,
+                        CanonOutput* output) {
+  DoAppendStringOfType<char16_t, char16_t>(source, length, type, output);
+}
+
+bool ReadUTFChar(const char* str,
+                 size_t* begin,
+                 size_t length,
+                 base_icu::UChar32* code_point_out) {
+  if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
+      !base::IsValidCharacter(*code_point_out)) {
+    *code_point_out = kUnicodeReplacementCharacter;
+    return false;
+  }
+  return true;
+}
+
+bool ReadUTFChar(const char16_t* str,
+                 size_t* begin,
+                 size_t length,
+                 base_icu::UChar32* code_point_out) {
+  if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out) ||
+      !base::IsValidCharacter(*code_point_out)) {
+    *code_point_out = kUnicodeReplacementCharacter;
+    return false;
+  }
+  return true;
+}
+
+void AppendInvalidNarrowString(const char* spec,
+                               size_t begin,
+                               size_t end,
+                               CanonOutput* output) {
+  DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
+}
+
+void AppendInvalidNarrowString(const char16_t* spec,
+                               size_t begin,
+                               size_t end,
+                               CanonOutput* output) {
+  DoAppendInvalidNarrowString<char16_t, char16_t>(spec, begin, end, output);
+}
+
+bool ConvertUTF16ToUTF8(const char16_t* input,
+                        size_t input_len,
+                        CanonOutput* output) {
+  bool success = true;
+  for (size_t i = 0; i < input_len; i++) {
+    base_icu::UChar32 code_point;
+    success &= ReadUTFChar(input, &i, input_len, &code_point);
+    AppendUTF8Value(code_point, output);
+  }
+  return success;
+}
+
+bool ConvertUTF8ToUTF16(const char* input,
+                        size_t input_len,
+                        CanonOutputT<char16_t>* output) {
+  bool success = true;
+  for (size_t i = 0; i < input_len; i++) {
+    base_icu::UChar32 code_point;
+    success &= ReadUTFChar(input, &i, input_len, &code_point);
+    AppendUTF16Value(code_point, output);
+  }
+  return success;
+}
+
+void SetupOverrideComponents(const char* base,
+                             const Replacements<char>& repl,
+                             URLComponentSource<char>* source,
+                             Parsed* parsed) {
+  // Get the source and parsed structures of the things we are replacing.
+  const URLComponentSource<char>& repl_source = repl.sources();
+  const Parsed& repl_parsed = repl.components();
+
+  DoOverrideComponent(repl_source.scheme, repl_parsed.scheme, &source->scheme,
+                      &parsed->scheme);
+  DoOverrideComponent(repl_source.username, repl_parsed.username,
+                      &source->username, &parsed->username);
+  DoOverrideComponent(repl_source.password, repl_parsed.password,
+                      &source->password, &parsed->password);
+
+  // Our host should be empty if not present, so override the default setup.
+  DoOverrideComponent(repl_source.host, repl_parsed.host, &source->host,
+                      &parsed->host);
+  if (parsed->host.len == -1)
+    parsed->host.len = 0;
+
+  DoOverrideComponent(repl_source.port, repl_parsed.port, &source->port,
+                      &parsed->port);
+  DoOverrideComponent(repl_source.path, repl_parsed.path, &source->path,
+                      &parsed->path);
+  DoOverrideComponent(repl_source.query, repl_parsed.query, &source->query,
+                      &parsed->query);
+  DoOverrideComponent(repl_source.ref, repl_parsed.ref, &source->ref,
+                      &parsed->ref);
+}
+
+bool SetupUTF16OverrideComponents(const char* base,
+                                  const Replacements<char16_t>& repl,
+                                  CanonOutput* utf8_buffer,
+                                  URLComponentSource<char>* source,
+                                  Parsed* parsed) {
+  bool success = true;
+
+  // Get the source and parsed structures of the things we are replacing.
+  const URLComponentSource<char16_t>& repl_source = repl.sources();
+  const Parsed& repl_parsed = repl.components();
+
+  success &= PrepareUTF16OverrideComponent(
+      repl_source.scheme, repl_parsed.scheme, utf8_buffer, &parsed->scheme);
+  success &=
+      PrepareUTF16OverrideComponent(repl_source.username, repl_parsed.username,
+                                    utf8_buffer, &parsed->username);
+  success &=
+      PrepareUTF16OverrideComponent(repl_source.password, repl_parsed.password,
+                                    utf8_buffer, &parsed->password);
+  success &= PrepareUTF16OverrideComponent(repl_source.host, repl_parsed.host,
+                                           utf8_buffer, &parsed->host);
+  success &= PrepareUTF16OverrideComponent(repl_source.port, repl_parsed.port,
+                                           utf8_buffer, &parsed->port);
+  success &= PrepareUTF16OverrideComponent(repl_source.path, repl_parsed.path,
+                                           utf8_buffer, &parsed->path);
+  success &= PrepareUTF16OverrideComponent(repl_source.query, repl_parsed.query,
+                                           utf8_buffer, &parsed->query);
+  success &= PrepareUTF16OverrideComponent(repl_source.ref, repl_parsed.ref,
+                                           utf8_buffer, &parsed->ref);
+
+  // PrepareUTF16OverrideComponent will not have set the data pointer since the
+  // buffer could be resized, invalidating the pointers. We set the data
+  // pointers for affected components now that the buffer is finalized.
+  if (repl_source.scheme)
+    source->scheme = utf8_buffer->data();
+  if (repl_source.username)
+    source->username = utf8_buffer->data();
+  if (repl_source.password)
+    source->password = utf8_buffer->data();
+  if (repl_source.host)
+    source->host = utf8_buffer->data();
+  if (repl_source.port)
+    source->port = utf8_buffer->data();
+  if (repl_source.path)
+    source->path = utf8_buffer->data();
+  if (repl_source.query)
+    source->query = utf8_buffer->data();
+  if (repl_source.ref)
+    source->ref = utf8_buffer->data();
+
+  return success;
+}
+
+#ifndef WIN32
+
+int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix) {
+  const char* format_str;
+  if (radix == 10)
+    format_str = "%d";
+  else if (radix == 16)
+    format_str = "%x";
+  else
+    return EINVAL;
+
+  int written = snprintf(buffer, size_in_chars, format_str, value);
+  if (static_cast<size_t>(written) >= size_in_chars) {
+    // Output was truncated, or written was negative.
+    return EINVAL;
+  }
+  return 0;
+}
+
+int _itow_s(int value, char16_t* buffer, size_t size_in_chars, int radix) {
+  if (radix != 10)
+    return EINVAL;
+
+  // No more than 12 characters will be required for a 32-bit integer.
+  // Add an extra byte for the terminating null.
+  char temp[13];
+  int written = snprintf(temp, sizeof(temp), "%d", value);
+  if (static_cast<size_t>(written) >= size_in_chars) {
+    // Output was truncated, or written was negative.
+    return EINVAL;
+  }
+
+  for (int i = 0; i < written; ++i) {
+    buffer[i] = static_cast<char16_t>(temp[i]);
+  }
+  buffer[written] = '\0';
+  return 0;
+}
+
+#endif  // !WIN32
+
+}  // namespace url
diff --git a/url_canon_internal.h b/url_canon_internal.h
new file mode 100644
index 00000000000..13481f5fdb6
--- /dev/null
+++ b/url_canon_internal.h
@@ -0,0 +1,471 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_CANON_INTERNAL_H_
+#define URL_URL_CANON_INTERNAL_H_
+
+// This file is intended to be included in another C++ file where the character
+// types are defined. This allows us to write mostly generic code, but not have
+// template bloat because everything is inlined when anybody calls any of our
+// functions.
+
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "base/component_export.h"
+#include "base/notreached.h"
+#include "base/third_party/icu/icu_utf.h"
+#include "url/url_canon.h"
+
+namespace url {
+
+// Character type handling -----------------------------------------------------
+
+// Bits that identify different character types. These types identify different
+// bits that are set for each 8-bit character in the kSharedCharTypeTable.
+enum SharedCharTypes {
+  // Characters that do not require escaping in queries. Characters that do
+  // not have this flag will be escaped; see url_canon_query.cc
+  CHAR_QUERY = 1,
+
+  // Valid in the username/password field.
+  CHAR_USERINFO = 2,
+
+  // Valid in a IPv4 address (digits plus dot and 'x' for hex).
+  CHAR_IPV4 = 4,
+
+  // Valid in an ASCII-representation of a hex digit (as in %-escaped).
+  CHAR_HEX = 8,
+
+  // Valid in an ASCII-representation of a decimal digit.
+  CHAR_DEC = 16,
+
+  // Valid in an ASCII-representation of an octal digit.
+  CHAR_OCT = 32,
+
+  // Characters that do not require escaping in encodeURIComponent. Characters
+  // that do not have this flag will be escaped; see url_util.cc.
+  CHAR_COMPONENT = 64,
+};
+
+// This table contains the flags in SharedCharTypes for each 8-bit character.
+// Some canonicalization functions have their own specialized lookup table.
+// For those with simple requirements, we have collected the flags in one
+// place so there are fewer lookup tables to load into the CPU cache.
+//
+// Using an unsigned char type has a small but measurable performance benefit
+// over using a 32-bit number.
+extern const unsigned char kSharedCharTypeTable[0x100];
+
+// More readable wrappers around the character type lookup table.
+inline bool IsCharOfType(unsigned char c, SharedCharTypes type) {
+  return !!(kSharedCharTypeTable[c] & type);
+}
+inline bool IsQueryChar(unsigned char c) {
+  return IsCharOfType(c, CHAR_QUERY);
+}
+inline bool IsIPv4Char(unsigned char c) {
+  return IsCharOfType(c, CHAR_IPV4);
+}
+inline bool IsHexChar(unsigned char c) {
+  return IsCharOfType(c, CHAR_HEX);
+}
+inline bool IsComponentChar(unsigned char c) {
+  return IsCharOfType(c, CHAR_COMPONENT);
+}
+
+// Appends the given string to the output, escaping characters that do not
+// match the given |type| in SharedCharTypes.
+void AppendStringOfType(const char* source,
+                        size_t length,
+                        SharedCharTypes type,
+                        CanonOutput* output);
+void AppendStringOfType(const char16_t* source,
+                        size_t length,
+                        SharedCharTypes type,
+                        CanonOutput* output);
+
+// Maps the hex numerical values 0x0 to 0xf to the corresponding ASCII digit
+// that will be used to represent it.
+COMPONENT_EXPORT(URL) extern const char kHexCharLookup[0x10];
+
+// This lookup table allows fast conversion between ASCII hex letters and their
+// corresponding numerical value. The 8-bit range is divided up into 8
+// regions of 0x20 characters each. Each of the three character types (numbers,
+// uppercase, lowercase) falls into different regions of this range. The table
+// contains the amount to subtract from characters in that range to get at
+// the corresponding numerical value.
+//
+// See HexDigitToValue for the lookup.
+extern const char kCharToHexLookup[8];
+
+// Assumes the input is a valid hex digit! Call IsHexChar before using this.
+inline int HexCharToValue(unsigned char c) {
+  return c - kCharToHexLookup[c / 0x20];
+}
+
+// Indicates if the given character is a dot or dot equivalent, returning the
+// number of characters taken by it. This will be one for a literal dot, 3 for
+// an escaped dot. If the character is not a dot, this will return 0.
+template <typename CHAR>
+inline size_t IsDot(const CHAR* spec, size_t offset, size_t end) {
+  if (spec[offset] == '.') {
+    return 1;
+  } else if (spec[offset] == '%' && offset + 3 <= end &&
+             spec[offset + 1] == '2' &&
+             (spec[offset + 2] == 'e' || spec[offset + 2] == 'E')) {
+    // Found "%2e"
+    return 3;
+  }
+  return 0;
+}
+
+// Returns the canonicalized version of the input character according to scheme
+// rules. This is implemented alongside the scheme canonicalizer, and is
+// required for relative URL resolving to test for scheme equality.
+//
+// Returns 0 if the input character is not a valid scheme character.
+char CanonicalSchemeChar(char16_t ch);
+
+// Write a single character, escaped, to the output. This always escapes: it
+// does no checking that thee character requires escaping.
+// Escaping makes sense only 8 bit chars, so code works in all cases of
+// input parameters (8/16bit).
+template <typename UINCHAR, typename OUTCHAR>
+inline void AppendEscapedChar(UINCHAR ch, CanonOutputT<OUTCHAR>* output) {
+  output->push_back('%');
+  output->push_back(static_cast<OUTCHAR>(kHexCharLookup[(ch >> 4) & 0xf]));
+  output->push_back(static_cast<OUTCHAR>(kHexCharLookup[ch & 0xf]));
+}
+
+// The character we'll substitute for undecodable or invalid characters.
+extern const base_icu::UChar32 kUnicodeReplacementCharacter;
+
+// UTF-8 functions ------------------------------------------------------------
+
+// Reads one character in UTF-8 starting at |*begin| in |str| and places
+// the decoded value into |*code_point|. If the character is valid, we will
+// return true. If invalid, we'll return false and put the
+// kUnicodeReplacementCharacter into |*code_point|.
+//
+// |*begin| will be updated to point to the last character consumed so it
+// can be incremented in a loop and will be ready for the next character.
+// (for a single-byte ASCII character, it will not be changed).
+COMPONENT_EXPORT(URL)
+bool ReadUTFChar(const char* str,
+                 size_t* begin,
+                 size_t length,
+                 base_icu::UChar32* code_point_out);
+
+// Generic To-UTF-8 converter. This will call the given append method for each
+// character that should be appended, with the given output method. Wrappers
+// are provided below for escaped and non-escaped versions of this.
+//
+// The char_value must have already been checked that it's a valid Unicode
+// character.
+template <class Output, void Appender(unsigned char, Output*)>
+inline void DoAppendUTF8(base_icu::UChar32 char_value, Output* output) {
+  DCHECK(char_value >= 0);
+  DCHECK(char_value <= 0x10FFFF);
+  if (char_value <= 0x7f) {
+    Appender(static_cast<unsigned char>(char_value), output);
+  } else if (char_value <= 0x7ff) {
+    // 110xxxxx 10xxxxxx
+    Appender(static_cast<unsigned char>(0xC0 | (char_value >> 6)), output);
+    Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)), output);
+  } else if (char_value <= 0xffff) {
+    // 1110xxxx 10xxxxxx 10xxxxxx
+    Appender(static_cast<unsigned char>(0xe0 | (char_value >> 12)), output);
+    Appender(static_cast<unsigned char>(0x80 | ((char_value >> 6) & 0x3f)),
+             output);
+    Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)), output);
+  } else {
+    // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+    Appender(static_cast<unsigned char>(0xf0 | (char_value >> 18)), output);
+    Appender(static_cast<unsigned char>(0x80 | ((char_value >> 12) & 0x3f)),
+             output);
+    Appender(static_cast<unsigned char>(0x80 | ((char_value >> 6) & 0x3f)),
+             output);
+    Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)), output);
+  }
+}
+
+// Helper used by AppendUTF8Value below. We use an unsigned parameter so there
+// are no funny sign problems with the input, but then have to convert it to
+// a regular char for appending.
+inline void AppendCharToOutput(unsigned char ch, CanonOutput* output) {
+  output->push_back(static_cast<char>(ch));
+}
+
+// Writes the given character to the output as UTF-8. This does NO checking
+// of the validity of the Unicode characters; the caller should ensure that
+// the value it is appending is valid to append.
+inline void AppendUTF8Value(base_icu::UChar32 char_value, CanonOutput* output) {
+  DoAppendUTF8<CanonOutput, AppendCharToOutput>(char_value, output);
+}
+
+// Writes the given character to the output as UTF-8, escaping ALL
+// characters (even when they are ASCII). This does NO checking of the
+// validity of the Unicode characters; the caller should ensure that the value
+// it is appending is valid to append.
+inline void AppendUTF8EscapedValue(base_icu::UChar32 char_value,
+                                   CanonOutput* output) {
+  DoAppendUTF8<CanonOutput, AppendEscapedChar>(char_value, output);
+}
+
+// UTF-16 functions -----------------------------------------------------------
+
+// Reads one character in UTF-16 starting at |*begin| in |str| and places
+// the decoded value into |*code_point|. If the character is valid, we will
+// return true. If invalid, we'll return false and put the
+// kUnicodeReplacementCharacter into |*code_point|.
+//
+// |*begin| will be updated to point to the last character consumed so it
+// can be incremented in a loop and will be ready for the next character.
+// (for a single-16-bit-word character, it will not be changed).
+COMPONENT_EXPORT(URL)
+bool ReadUTFChar(const char16_t* str,
+                 size_t* begin,
+                 size_t length,
+                 base_icu::UChar32* code_point_out);
+
+// Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method.
+inline void AppendUTF16Value(base_icu::UChar32 code_point,
+                             CanonOutputT<char16_t>* output) {
+  if (code_point > 0xffff) {
+    output->push_back(static_cast<char16_t>((code_point >> 10) + 0xd7c0));
+    output->push_back(static_cast<char16_t>((code_point & 0x3ff) | 0xdc00));
+  } else {
+    output->push_back(static_cast<char16_t>(code_point));
+  }
+}
+
+// Escaping functions ---------------------------------------------------------
+
+// Writes the given character to the output as UTF-8, escaped. Call this
+// function only when the input is wide. Returns true on success. Failure
+// means there was some problem with the encoding, we'll still try to
+// update the |*begin| pointer and add a placeholder character to the
+// output so processing can continue.
+//
+// We will append the character starting at ch[begin] with the buffer ch
+// being |length|. |*begin| will be updated to point to the last character
+// consumed (we may consume more than one for UTF-16) so that if called in
+// a loop, incrementing the pointer will move to the next character.
+//
+// Every single output character will be escaped. This means that if you
+// give it an ASCII character as input, it will be escaped. Some code uses
+// this when it knows that a character is invalid according to its rules
+// for validity. If you don't want escaping for ASCII characters, you will
+// have to filter them out prior to calling this function.
+//
+// Assumes that ch[begin] is within range in the array, but does not assume
+// that any following characters are.
+inline bool AppendUTF8EscapedChar(const char16_t* str,
+                                  size_t* begin,
+                                  size_t length,
+                                  CanonOutput* output) {
+  // UTF-16 input. ReadUTFChar will handle invalid characters for us and give
+  // us the kUnicodeReplacementCharacter, so we don't have to do special
+  // checking after failure, just pass through the failure to the caller.
+  base_icu::UChar32 char_value;
+  bool success = ReadUTFChar(str, begin, length, &char_value);
+  AppendUTF8EscapedValue(char_value, output);
+  return success;
+}
+
+// Handles UTF-8 input. See the wide version above for usage.
+inline bool AppendUTF8EscapedChar(const char* str,
+                                  size_t* begin,
+                                  size_t length,
+                                  CanonOutput* output) {
+  // ReadUTF8Char will handle invalid characters for us and give us the
+  // kUnicodeReplacementCharacter, so we don't have to do special checking
+  // after failure, just pass through the failure to the caller.
+  base_icu::UChar32 ch;
+  bool success = ReadUTFChar(str, begin, length, &ch);
+  AppendUTF8EscapedValue(ch, output);
+  return success;
+}
+
+// Given a '%' character at |*begin| in the string |spec|, this will decode
+// the escaped value and put it into |*unescaped_value| on success (returns
+// true). On failure, this will return false, and will not write into
+// |*unescaped_value|.
+//
+// |*begin| will be updated to point to the last character of the escape
+// sequence so that when called with the index of a for loop, the next time
+// through it will point to the next character to be considered. On failure,
+// |*begin| will be unchanged.
+inline bool Is8BitChar(char c) {
+  return true;  // this case is specialized to avoid a warning
+}
+inline bool Is8BitChar(char16_t c) {
+  return c <= 255;
+}
+
+template <typename CHAR>
+inline bool DecodeEscaped(const CHAR* spec,
+                          size_t* begin,
+                          size_t end,
+                          unsigned char* unescaped_value) {
+  if (*begin + 3 > end || !Is8BitChar(spec[*begin + 1]) ||
+      !Is8BitChar(spec[*begin + 2])) {
+    // Invalid escape sequence because there's not enough room, or the
+    // digits are not ASCII.
+    return false;
+  }
+
+  unsigned char first = static_cast<unsigned char>(spec[*begin + 1]);
+  unsigned char second = static_cast<unsigned char>(spec[*begin + 2]);
+  if (!IsHexChar(first) || !IsHexChar(second)) {
+    // Invalid hex digits, fail.
+    return false;
+  }
+
+  // Valid escape sequence.
+  *unescaped_value = static_cast<unsigned char>((HexCharToValue(first) << 4) +
+                                                HexCharToValue(second));
+  *begin += 2;
+  return true;
+}
+
+// Appends the given substring to the output, escaping "some" characters that
+// it feels may not be safe. It assumes the input values are all contained in
+// 8-bit although it allows any type.
+//
+// This is used in error cases to append invalid output so that it looks
+// approximately correct. Non-error cases should not call this function since
+// the escaping rules are not guaranteed!
+void AppendInvalidNarrowString(const char* spec,
+                               size_t begin,
+                               size_t end,
+                               CanonOutput* output);
+void AppendInvalidNarrowString(const char16_t* spec,
+                               size_t begin,
+                               size_t end,
+                               CanonOutput* output);
+
+// Misc canonicalization helpers ----------------------------------------------
+
+// Converts between UTF-8 and UTF-16, returning true on successful conversion.
+// The output will be appended to the given canonicalizer output (so make sure
+// it's empty if you want to replace).
+//
+// On invalid input, this will still write as much output as possible,
+// replacing the invalid characters with the "invalid character". It will
+// return false in the failure case, and the caller should not continue as
+// normal.
+COMPONENT_EXPORT(URL)
+bool ConvertUTF16ToUTF8(const char16_t* input,
+                        size_t input_len,
+                        CanonOutput* output);
+COMPONENT_EXPORT(URL)
+bool ConvertUTF8ToUTF16(const char* input,
+                        size_t input_len,
+                        CanonOutputT<char16_t>* output);
+
+// Converts from UTF-16 to 8-bit using the character set converter. If the
+// converter is NULL, this will use UTF-8.
+void ConvertUTF16ToQueryEncoding(const char16_t* input,
+                                 const Component& query,
+                                 CharsetConverter* converter,
+                                 CanonOutput* output);
+
+// Applies the replacements to the given component source. The component source
+// should be pre-initialized to the "old" base. That is, all pointers will
+// point to the spec of the old URL, and all of the Parsed components will
+// be indices into that string.
+//
+// The pointers and components in the |source| for all non-NULL strings in the
+// |repl| (replacements) will be updated to reference those strings.
+// Canonicalizing with the new |source| and |parsed| can then combine URL
+// components from many different strings.
+void SetupOverrideComponents(const char* base,
+                             const Replacements<char>& repl,
+                             URLComponentSource<char>* source,
+                             Parsed* parsed);
+
+// Like the above 8-bit version, except that it additionally converts the
+// UTF-16 input to UTF-8 before doing the overrides.
+//
+// The given utf8_buffer is used to store the converted components. They will
+// be appended one after another, with the parsed structure identifying the
+// appropriate substrings. This buffer is a parameter because the source has
+// no storage, so the buffer must have the same lifetime as the source
+// parameter owned by the caller.
+//
+// THE CALLER MUST NOT ADD TO THE |utf8_buffer| AFTER THIS CALL. Members of
+// |source| will point into this buffer, which could be invalidated if
+// additional data is added and the CanonOutput resizes its buffer.
+//
+// Returns true on success. False means that the input was not valid UTF-16,
+// although we will have still done the override with "invalid characters" in
+// place of errors.
+bool SetupUTF16OverrideComponents(const char* base,
+                                  const Replacements<char16_t>& repl,
+                                  CanonOutput* utf8_buffer,
+                                  URLComponentSource<char>* source,
+                                  Parsed* parsed);
+
+// Implemented in url_canon_path.cc, these are required by the relative URL
+// resolver as well, so we declare them here.
+bool CanonicalizePartialPathInternal(const char* spec,
+                                     const Component& path,
+                                     size_t path_begin_in_output,
+                                     CanonOutput* output);
+bool CanonicalizePartialPathInternal(const char16_t* spec,
+                                     const Component& path,
+                                     size_t path_begin_in_output,
+                                     CanonOutput* output);
+
+// Find the position of a bona fide Windows drive letter in the given path. If
+// no leading drive letter is found, -1 is returned. This function correctly
+// treats /c:/foo and /./c:/foo as having drive letters, and /def/c:/foo as not
+// having a drive letter.
+//
+// Exported for tests.
+COMPONENT_EXPORT(URL)
+int FindWindowsDriveLetter(const char* spec, int begin, int end);
+COMPONENT_EXPORT(URL)
+int FindWindowsDriveLetter(const char16_t* spec, int begin, int end);
+
+#ifndef WIN32
+
+// Implementations of Windows' int-to-string conversions
+COMPONENT_EXPORT(URL)
+int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix);
+COMPONENT_EXPORT(URL)
+int _itow_s(int value, char16_t* buffer, size_t size_in_chars, int radix);
+
+// Secure template overloads for these functions
+template <size_t N>
+inline int _itoa_s(int value, char (&buffer)[N], int radix) {
+  return _itoa_s(value, buffer, N, radix);
+}
+
+template <size_t N>
+inline int _itow_s(int value, char16_t (&buffer)[N], int radix) {
+  return _itow_s(value, buffer, N, radix);
+}
+
+// _strtoui64 and strtoull behave the same
+inline unsigned long long _strtoui64(const char* nptr,
+                                     char** endptr,
+                                     int base) {
+  return strtoull(nptr, endptr, base);
+}
+
+#endif  // WIN32
+
+// The threshold we set to consider SIMD processing, in bytes; there is
+// no deep theory here, it's just set empirically to a value that seems
+// to be good. (We don't really know why there's a slowdown for zero;
+// but a guess would be that there's no need in going into a complex loop
+// with a lot of setup for a five-byte string.)
+static constexpr int kMinimumLengthForSIMD = 50;
+
+}  // namespace url
+
+#endif  // URL_URL_CANON_INTERNAL_H_
diff --git a/url_canon_internal_file.h b/url_canon_internal_file.h
new file mode 100644
index 00000000000..32cb84096b2
--- /dev/null
+++ b/url_canon_internal_file.h
@@ -0,0 +1,135 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_CANON_INTERNAL_FILE_H_
+#define URL_URL_CANON_INTERNAL_FILE_H_
+
+// As with url_canon_internal.h, this file is intended to be included in
+// another C++ file where the template types are defined. This allows the
+// programmer to use this to use these functions for their own strings
+// types, without bloating the code by having inline templates used in
+// every call site.
+//
+// *** This file must be included after url_canon_internal as we depend on some
+// functions in it. ***
+
+#include "base/strings/string_util.h"
+#include "url/url_file.h"
+#include "url/url_parse_internal.h"
+
+namespace url {
+
+// Given a pointer into the spec, this copies and canonicalizes the drive
+// letter and colon to the output, if one is found. If there is not a drive
+// spec, it won't do anything. The index of the next character in the input
+// spec is returned (after the colon when a drive spec is found, the begin
+// offset if one is not).
+template<typename CHAR>
+static int FileDoDriveSpec(const CHAR* spec, int begin, int end,
+                           CanonOutput* output) {
+  // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo,
+  // (with backslashes instead of slashes as well).
+  int num_slashes = CountConsecutiveSlashes(spec, begin, end);
+  int after_slashes = begin + num_slashes;
+
+  if (!DoesBeginWindowsDriveSpec(spec, after_slashes, end))
+    return begin;  // Haven't consumed any characters
+
+  // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid
+  // and that it is followed by a colon/pipe.
+
+  // Normalize Windows drive letters to uppercase
+  if (base::IsAsciiLower(spec[after_slashes]))
+    output->push_back(spec[after_slashes] - 'a' + 'A');
+  else
+    output->push_back(static_cast<char>(spec[after_slashes]));
+
+  // Normalize the character following it to a colon rather than pipe.
+  output->push_back(':');
+  output->push_back('/');
+  return after_slashes + 2;
+}
+
+// FileDoDriveSpec will have already added the first backslash, so we need to
+// write everything following the slashes using the path canonicalizer.
+template<typename CHAR, typename UCHAR>
+static void FileDoPath(const CHAR* spec, int begin, int end,
+                       CanonOutput* output) {
+  // Normalize the number of slashes after the drive letter. The path
+  // canonicalizer expects the input to begin in a slash already so
+  // doesn't check. We want to handle no-slashes
+  int num_slashes = CountConsecutiveSlashes(spec, begin, end);
+  int after_slashes = begin + num_slashes;
+
+  // Now use the regular path canonicalizer to canonicalize the rest of the
+  // path. We supply it with the path following the slashes. It won't prepend
+  // a slash because it assumes any nonempty path already starts with one.
+  // We explicitly filter out calls with no path here to prevent that case.
+  ParsedComponent sub_path(after_slashes, end - after_slashes);
+  if (sub_path.len > 0) {
+    // Give it a fake output component to write into. DoCanonicalizeFile will
+    // compute the full path component.
+    ParsedComponent fake_output_path;
+    URLCanonInternal<CHAR, UCHAR>::DoPath(
+        spec, sub_path, output, &fake_output_path);
+  }
+}
+
+template<typename CHAR, typename UCHAR>
+static bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
+                                  const ParsedURL& parsed,
+                                  CanonOutput* output,
+                                  ParsedURL* new_parsed) {
+  // Things we don't set in file: URLs.
+  new_parsed->username = ParsedComponent(0, -1);
+  new_parsed->password = ParsedComponent(0, -1);
+  new_parsed->port = ParsedComponent(0, -1);
+
+  // Scheme (known, so we don't bother running it through the more
+  // complicated scheme canonicalizer).
+  new_parsed->scheme.begin = output->length();
+  output->push_back('f');
+  output->push_back('i');
+  output->push_back('l');
+  output->push_back('e');
+  new_parsed->scheme.len = output->length() - new_parsed->scheme.begin;
+  output->push_back(':');
+
+  // Write the separator for the host.
+  output->push_back('/');
+  output->push_back('/');
+
+  // Append the host. For many file URLs, this will be empty. For UNC, this
+  // will be present.
+  // TODO(brettw) This doesn't do any checking for host name validity. We
+  // should probably handle validity checking of UNC hosts differently than
+  // for regular IP hosts.
+  bool success = URLCanonInternal<CHAR, UCHAR>::DoHost(
+      source.host, parsed.host, output, &new_parsed->host);
+
+  // Write a separator for the start of the path. We'll ignore any slashes
+  // already at the beginning of the path.
+  new_parsed->path.begin = output->length();
+  output->push_back('/');
+
+  // Copy and normalize the "c:" at the beginning, if present.
+  int after_drive = FileDoDriveSpec(source.path, parsed.path.begin,
+                                    parsed.path.end(), output);
+
+  // Copy the rest of the path.
+  FileDoPath<CHAR, UCHAR>(source.path, after_drive, parsed.path.end(), output);
+  new_parsed->path.len = output->length() - new_parsed->path.begin;
+
+  // For things following the path, we can use the standard canonicalizers.
+  success &= URLCanonInternal<CHAR, UCHAR>::DoQuery(
+      source.query, parsed.query, output, &new_parsed->query);
+  success &= URLCanonInternal<CHAR, UCHAR>::DoRef(
+      source.ref, parsed.ref, output, &new_parsed->ref);
+
+  return success;
+}
+
+}  // namespace url
+
+#endif  // URL_URL_CANON_INTERNAL_FILE_H_
diff --git a/url_canon_ip.cc b/url_canon_ip.cc
new file mode 100644
index 00000000000..783ddccf852
--- /dev/null
+++ b/url_canon_ip.cc
@@ -0,0 +1,690 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/url_canon_ip.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <limits>
+
+#include "base/check.h"
+#include "url/url_canon_internal.h"
+#include "url/url_features.h"
+
+namespace url {
+
+namespace {
+
+// Converts one of the character types that represent a numerical base to the
+// corresponding base.
+int BaseForType(SharedCharTypes type) {
+  switch (type) {
+    case CHAR_HEX:
+      return 16;
+    case CHAR_DEC:
+      return 10;
+    case CHAR_OCT:
+      return 8;
+    default:
+      return 0;
+  }
+}
+
+// Converts an IPv4 component to a 32-bit number, while checking for overflow.
+//
+// Possible return values:
+// - IPV4    - The number was valid, and did not overflow.
+// - BROKEN  - The input was numeric, but too large for a 32-bit field.
+// - NEUTRAL - Input was not numeric.
+//
+// The input is assumed to be ASCII. The components are assumed to be non-empty.
+template<typename CHAR>
+CanonHostInfo::Family IPv4ComponentToNumber(const CHAR* spec,
+                                            const Component& component,
+                                            uint32_t* number) {
+  // Empty components are considered non-numeric.
+  if (component.is_empty())
+    return CanonHostInfo::NEUTRAL;
+
+  // Figure out the base
+  SharedCharTypes base;
+  int base_prefix_len = 0;  // Size of the prefix for this base.
+  if (spec[component.begin] == '0') {
+    // Either hex or dec, or a standalone zero.
+    if (component.len == 1) {
+      base = CHAR_DEC;
+    } else if (spec[component.begin + 1] == 'X' ||
+               spec[component.begin + 1] == 'x') {
+      base = CHAR_HEX;
+      base_prefix_len = 2;
+    } else {
+      base = CHAR_OCT;
+      base_prefix_len = 1;
+    }
+  } else {
+    base = CHAR_DEC;
+  }
+
+  // Extend the prefix to consume all leading zeros.
+  while (base_prefix_len < component.len &&
+         spec[component.begin + base_prefix_len] == '0')
+    base_prefix_len++;
+
+  // Put the component, minus any base prefix, into a NULL-terminated buffer so
+  // we can call the standard library. Because leading zeros have already been
+  // discarded, filling the entire buffer is guaranteed to trigger the 32-bit
+  // overflow check.
+  const int kMaxComponentLen = 16;
+  char buf[kMaxComponentLen + 1];  // digits + '\0'
+  int dest_i = 0;
+  bool may_be_broken_octal = false;
+  for (int i = component.begin + base_prefix_len; i < component.end(); i++) {
+    if (spec[i] >= 0x80)
+      return CanonHostInfo::NEUTRAL;
+
+    // We know the input is 7-bit, so convert to narrow (if this is the wide
+    // version of the template) by casting.
+    char input = static_cast<char>(spec[i]);
+
+    // Validate that this character is OK for the given base.
+    if (!IsCharOfType(input, base)) {
+      if (IsCharOfType(input, CHAR_DEC)) {
+        // Entirely numeric components with leading 0s that aren't octal are
+        // considered broken.
+        may_be_broken_octal = true;
+      } else {
+        return CanonHostInfo::NEUTRAL;
+      }
+    }
+
+    // Fill the buffer, if there's space remaining. This check allows us to
+    // verify that all characters are numeric, even those that don't fit.
+    if (dest_i < kMaxComponentLen)
+      buf[dest_i++] = input;
+  }
+
+  if (may_be_broken_octal)
+    return CanonHostInfo::BROKEN;
+
+  buf[dest_i] = '\0';
+
+  // Use the 64-bit strtoi so we get a big number (no hex, decimal, or octal
+  // number can overflow a 64-bit number in <= 16 characters).
+  uint64_t num = _strtoui64(buf, NULL, BaseForType(base));
+
+  // Check for 32-bit overflow.
+  if (num > std::numeric_limits<uint32_t>::max())
+    return CanonHostInfo::BROKEN;
+
+  // No overflow. Success!
+  *number = static_cast<uint32_t>(num);
+  return CanonHostInfo::IPV4;
+}
+
+// See declaration of IPv4AddressToNumber for documentation.
+template <typename CHAR, typename UCHAR>
+CanonHostInfo::Family DoIPv4AddressToNumber(const CHAR* spec,
+                                            Component host,
+                                            unsigned char address[4],
+                                            int* num_ipv4_components) {
+  // Ignore terminal dot, if present.
+  if (host.is_nonempty() && spec[host.end() - 1] == '.')
+    --host.len;
+
+  // Do nothing if empty.
+  if (host.is_empty())
+    return CanonHostInfo::NEUTRAL;
+
+  // Read component values.  The first `existing_components` of them are
+  // populated front to back, with the first one corresponding to the last
+  // component, which allows for early exit if the last component isn't a
+  // number.
+  uint32_t component_values[4];
+  int existing_components = 0;
+
+  int current_component_end = host.end();
+  int current_position = current_component_end;
+  while (true) {
+    // If this is not the first character of a component, go to the next
+    // component.
+    if (current_position != host.begin && spec[current_position - 1] != '.') {
+      --current_position;
+      continue;
+    }
+
+    CanonHostInfo::Family family = IPv4ComponentToNumber(
+        spec,
+        Component(current_position, current_component_end - current_position),
+        &component_values[existing_components]);
+
+    // If `family` is NEUTRAL and this is the last component, return NEUTRAL. If
+    // `family` is NEUTRAL but not the last component, this is considered a
+    // BROKEN IPv4 address, as opposed to a non-IPv4 hostname.
+    if (family == CanonHostInfo::NEUTRAL && existing_components == 0)
+      return CanonHostInfo::NEUTRAL;
+
+    if (family != CanonHostInfo::IPV4)
+      return CanonHostInfo::BROKEN;
+
+    ++existing_components;
+
+    // If this is the final component, nothing else to do.
+    if (current_position == host.begin)
+      break;
+
+    // If there are more than 4 components, fail.
+    if (existing_components == 4)
+      return CanonHostInfo::BROKEN;
+
+    current_component_end = current_position - 1;
+    --current_position;
+  }
+
+  // Use `component_values` to fill out the 4-component IP address.
+
+  // First, process all components but the last, while making sure each fits
+  // within an 8-bit field.
+  for (int i = existing_components - 1; i > 0; i--) {
+    if (component_values[i] > std::numeric_limits<uint8_t>::max())
+      return CanonHostInfo::BROKEN;
+    address[existing_components - i - 1] =
+        static_cast<unsigned char>(component_values[i]);
+  }
+
+  uint32_t last_value = component_values[0];
+  for (int i = 3; i >= existing_components - 1; i--) {
+    address[i] = static_cast<unsigned char>(last_value);
+    last_value >>= 8;
+  }
+
+  // If the last component has residual bits, report overflow.
+  if (last_value != 0)
+    return CanonHostInfo::BROKEN;
+
+  // Tell the caller how many components we saw.
+  *num_ipv4_components = existing_components;
+
+  // Success!
+  return CanonHostInfo::IPV4;
+}
+
+// Return true if we've made a final IPV4/BROKEN decision, false if the result
+// is NEUTRAL, and we could use a second opinion.
+template<typename CHAR, typename UCHAR>
+bool DoCanonicalizeIPv4Address(const CHAR* spec,
+                               const Component& host,
+                               CanonOutput* output,
+                               CanonHostInfo* host_info) {
+  host_info->family = IPv4AddressToNumber(
+      spec, host, host_info->address, &host_info->num_ipv4_components);
+
+  switch (host_info->family) {
+    case CanonHostInfo::IPV4:
+      // Definitely an IPv4 address.
+      host_info->out_host.begin = output->length();
+      AppendIPv4Address(host_info->address, output);
+      host_info->out_host.len = output->length() - host_info->out_host.begin;
+      return true;
+    case CanonHostInfo::BROKEN:
+      // Definitely broken.
+      return true;
+    default:
+      // Could be IPv6 or a hostname.
+      return false;
+  }
+}
+
+// Helper class that describes the main components of an IPv6 input string.
+// See the following examples to understand how it breaks up an input string:
+//
+// [Example 1]: input = "[::aa:bb]"
+//  ==> num_hex_components = 2
+//  ==> hex_components[0] = Component(3,2) "aa"
+//  ==> hex_components[1] = Component(6,2) "bb"
+//  ==> index_of_contraction = 0
+//  ==> ipv4_component = Component(0, -1)
+//
+// [Example 2]: input = "[1:2::3:4:5]"
+//  ==> num_hex_components = 5
+//  ==> hex_components[0] = Component(1,1) "1"
+//  ==> hex_components[1] = Component(3,1) "2"
+//  ==> hex_components[2] = Component(6,1) "3"
+//  ==> hex_components[3] = Component(8,1) "4"
+//  ==> hex_components[4] = Component(10,1) "5"
+//  ==> index_of_contraction = 2
+//  ==> ipv4_component = Component(0, -1)
+//
+// [Example 3]: input = "[::ffff:192.168.0.1]"
+//  ==> num_hex_components = 1
+//  ==> hex_components[0] = Component(3,4) "ffff"
+//  ==> index_of_contraction = 0
+//  ==> ipv4_component = Component(8, 11) "192.168.0.1"
+//
+// [Example 4]: input = "[1::]"
+//  ==> num_hex_components = 1
+//  ==> hex_components[0] = Component(1,1) "1"
+//  ==> index_of_contraction = 1
+//  ==> ipv4_component = Component(0, -1)
+//
+// [Example 5]: input = "[::192.168.0.1]"
+//  ==> num_hex_components = 0
+//  ==> index_of_contraction = 0
+//  ==> ipv4_component = Component(8, 11) "192.168.0.1"
+//
+struct IPv6Parsed {
+  // Zero-out the parse information.
+  void reset() {
+    num_hex_components = 0;
+    index_of_contraction = -1;
+    ipv4_component.reset();
+  }
+
+  // There can be up to 8 hex components (colon separated) in the literal.
+  Component hex_components[8];
+
+  // The count of hex components present. Ranges from [0,8].
+  int num_hex_components;
+
+  // The index of the hex component that the "::" contraction precedes, or
+  // -1 if there is no contraction.
+  int index_of_contraction;
+
+  // The range of characters which are an IPv4 literal.
+  Component ipv4_component;
+};
+
+// Parse the IPv6 input string. If parsing succeeded returns true and fills
+// |parsed| with the information. If parsing failed (because the input is
+// invalid) returns false.
+template<typename CHAR, typename UCHAR>
+bool DoParseIPv6(const CHAR* spec, const Component& host, IPv6Parsed* parsed) {
+  // Zero-out the info.
+  parsed->reset();
+
+  if (host.is_empty())
+    return false;
+
+  // The index for start and end of address range (no brackets).
+  int begin = host.begin;
+  int end = host.end();
+
+  int cur_component_begin = begin;  // Start of the current component.
+
+  // Scan through the input, searching for hex components, "::" contractions,
+  // and IPv4 components.
+  for (int i = begin; /* i <= end */; i++) {
+    bool is_colon = spec[i] == ':';
+    bool is_contraction = is_colon && i < end - 1 && spec[i + 1] == ':';
+
+    // We reached the end of the current component if we encounter a colon
+    // (separator between hex components, or start of a contraction), or end of
+    // input.
+    if (is_colon || i == end) {
+      int component_len = i - cur_component_begin;
+
+      // A component should not have more than 4 hex digits.
+      if (component_len > 4)
+        return false;
+
+      // Don't allow empty components.
+      if (component_len == 0) {
+        // The exception is when contractions appear at beginning of the
+        // input or at the end of the input.
+        if (!((is_contraction && i == begin) || (i == end &&
+            parsed->index_of_contraction == parsed->num_hex_components)))
+          return false;
+      }
+
+      // Add the hex component we just found to running list.
+      if (component_len > 0) {
+        // Can't have more than 8 components!
+        if (parsed->num_hex_components >= 8)
+          return false;
+
+        parsed->hex_components[parsed->num_hex_components++] =
+            Component(cur_component_begin, component_len);
+      }
+    }
+
+    if (i == end)
+      break;  // Reached the end of the input, DONE.
+
+    // We found a "::" contraction.
+    if (is_contraction) {
+      // There can be at most one contraction in the literal.
+      if (parsed->index_of_contraction != -1)
+        return false;
+      parsed->index_of_contraction = parsed->num_hex_components;
+      ++i;  // Consume the colon we peeked.
+    }
+
+    if (is_colon) {
+      // Colons are separators between components, keep track of where the
+      // current component started (after this colon).
+      cur_component_begin = i + 1;
+    } else {
+      if (static_cast<UCHAR>(spec[i]) >= 0x80)
+        return false;  // Not ASCII.
+
+      if (!IsHexChar(static_cast<unsigned char>(spec[i]))) {
+        // Regular components are hex numbers. It is also possible for
+        // a component to be an IPv4 address in dotted form.
+        if (IsIPv4Char(static_cast<unsigned char>(spec[i]))) {
+          // Since IPv4 address can only appear at the end, assume the rest
+          // of the string is an IPv4 address. (We will parse this separately
+          // later).
+          parsed->ipv4_component =
+              Component(cur_component_begin, end - cur_component_begin);
+          break;
+        } else {
+          // The character was neither a hex digit, nor an IPv4 character.
+          return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+// Verifies the parsed IPv6 information, checking that the various components
+// add up to the right number of bits (hex components are 16 bits, while
+// embedded IPv4 formats are 32 bits, and contractions are placeholdes for
+// 16 or more bits). Returns true if sizes match up, false otherwise. On
+// success writes the length of the contraction (if any) to
+// |out_num_bytes_of_contraction|.
+bool CheckIPv6ComponentsSize(const IPv6Parsed& parsed,
+                             int* out_num_bytes_of_contraction) {
+  // Each group of four hex digits contributes 16 bits.
+  int num_bytes_without_contraction = parsed.num_hex_components * 2;
+
+  // If an IPv4 address was embedded at the end, it contributes 32 bits.
+  if (parsed.ipv4_component.is_valid())
+    num_bytes_without_contraction += 4;
+
+  // If there was a "::" contraction, its size is going to be:
+  // MAX([16bits], [128bits] - num_bytes_without_contraction).
+  int num_bytes_of_contraction = 0;
+  if (parsed.index_of_contraction != -1) {
+    num_bytes_of_contraction = 16 - num_bytes_without_contraction;
+    if (num_bytes_of_contraction < 2)
+      num_bytes_of_contraction = 2;
+  }
+
+  // Check that the numbers add up.
+  if (num_bytes_without_contraction + num_bytes_of_contraction != 16)
+    return false;
+
+  *out_num_bytes_of_contraction = num_bytes_of_contraction;
+  return true;
+}
+
+// Converts a hex component into a number. This cannot fail since the caller has
+// already verified that each character in the string was a hex digit, and
+// that there were no more than 4 characters.
+template <typename CHAR>
+uint16_t IPv6HexComponentToNumber(const CHAR* spec,
+                                  const Component& component) {
+  DCHECK(component.len <= 4);
+
+  // Copy the hex string into a C-string.
+  char buf[5];
+  for (int i = 0; i < component.len; ++i)
+    buf[i] = static_cast<char>(spec[component.begin + i]);
+  buf[component.len] = '\0';
+
+  // Convert it to a number (overflow is not possible, since with 4 hex
+  // characters we can at most have a 16 bit number).
+  return static_cast<uint16_t>(_strtoui64(buf, NULL, 16));
+}
+
+// Converts an IPv6 address to a 128-bit number (network byte order), returning
+// true on success. False means that the input was not a valid IPv6 address.
+template<typename CHAR, typename UCHAR>
+bool DoIPv6AddressToNumber(const CHAR* spec,
+                           const Component& host,
+                           unsigned char address[16]) {
+  // Make sure the component is bounded by '[' and ']'.
+  int end = host.end();
+  if (host.is_empty() || spec[host.begin] != '[' || spec[end - 1] != ']')
+    return false;
+
+  // Exclude the square brackets.
+  Component ipv6_comp(host.begin + 1, host.len - 2);
+
+  // Parse the IPv6 address -- identify where all the colon separated hex
+  // components are, the "::" contraction, and the embedded IPv4 address.
+  IPv6Parsed ipv6_parsed;
+  if (!DoParseIPv6<CHAR, UCHAR>(spec, ipv6_comp, &ipv6_parsed))
+    return false;
+
+  // Do some basic size checks to make sure that the address doesn't
+  // specify more than 128 bits or fewer than 128 bits. This also resolves
+  // how may zero bytes the "::" contraction represents.
+  int num_bytes_of_contraction;
+  if (!CheckIPv6ComponentsSize(ipv6_parsed, &num_bytes_of_contraction))
+    return false;
+
+  int cur_index_in_address = 0;
+
+  // Loop through each hex components, and contraction in order.
+  for (int i = 0; i <= ipv6_parsed.num_hex_components; ++i) {
+    // Append the contraction if it appears before this component.
+    if (i == ipv6_parsed.index_of_contraction) {
+      for (int j = 0; j < num_bytes_of_contraction; ++j)
+        address[cur_index_in_address++] = 0;
+    }
+    // Append the hex component's value.
+    if (i != ipv6_parsed.num_hex_components) {
+      // Get the 16-bit value for this hex component.
+      uint16_t number = IPv6HexComponentToNumber<CHAR>(
+          spec, ipv6_parsed.hex_components[i]);
+      // Append to |address|, in network byte order.
+      address[cur_index_in_address++] = (number & 0xFF00) >> 8;
+      address[cur_index_in_address++] = (number & 0x00FF);
+    }
+  }
+
+  // If there was an IPv4 section, convert it into a 32-bit number and append
+  // it to |address|.
+  if (ipv6_parsed.ipv4_component.is_valid()) {
+    // Append the 32-bit number to |address|.
+    int num_ipv4_components = 0;
+    // IPv4AddressToNumber will remove the trailing dot from the component.
+    bool trailing_dot = ipv6_parsed.ipv4_component.is_nonempty() &&
+                        spec[ipv6_parsed.ipv4_component.end() - 1] == '.';
+    // The URL standard requires the embedded IPv4 address to be concisely
+    // composed of 4 parts and disallows terminal dots.
+    // See https://url.spec.whatwg.org/#concept-ipv6-parser
+    if (CanonHostInfo::IPV4 !=
+            IPv4AddressToNumber(spec, ipv6_parsed.ipv4_component,
+                                &address[cur_index_in_address],
+                                &num_ipv4_components)) {
+      return false;
+    }
+    if ((num_ipv4_components != 4 || trailing_dot) &&
+        base::FeatureList::IsEnabled(
+            url::kStrictIPv4EmbeddedIPv6AddressParsing)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Searches for the longest sequence of zeros in |address|, and writes the
+// range into |contraction_range|. The run of zeros must be at least 16 bits,
+// and if there is a tie the first is chosen.
+void ChooseIPv6ContractionRange(const unsigned char address[16],
+                                Component* contraction_range) {
+  // The longest run of zeros in |address| seen so far.
+  Component max_range;
+
+  // The current run of zeros in |address| being iterated over.
+  Component cur_range;
+
+  for (int i = 0; i < 16; i += 2) {
+    // Test for 16 bits worth of zero.
+    bool is_zero = (address[i] == 0 && address[i + 1] == 0);
+
+    if (is_zero) {
+      // Add the zero to the current range (or start a new one).
+      if (!cur_range.is_valid())
+        cur_range = Component(i, 0);
+      cur_range.len += 2;
+    }
+
+    if (!is_zero || i == 14) {
+      // Just completed a run of zeros. If the run is greater than 16 bits,
+      // it is a candidate for the contraction.
+      if (cur_range.len > 2 && cur_range.len > max_range.len) {
+        max_range = cur_range;
+      }
+      cur_range.reset();
+    }
+  }
+  *contraction_range = max_range;
+}
+
+// Return true if we've made a final IPV6/BROKEN decision, false if the result
+// is NEUTRAL, and we could use a second opinion.
+template<typename CHAR, typename UCHAR>
+bool DoCanonicalizeIPv6Address(const CHAR* spec,
+                               const Component& host,
+                               CanonOutput* output,
+                               CanonHostInfo* host_info) {
+  // Turn the IP address into a 128 bit number.
+  if (!IPv6AddressToNumber(spec, host, host_info->address)) {
+    // If it's not an IPv6 address, scan for characters that should *only*
+    // exist in an IPv6 address.
+    for (int i = host.begin; i < host.end(); i++) {
+      switch (spec[i]) {
+        case '[':
+        case ']':
+        case ':':
+          host_info->family = CanonHostInfo::BROKEN;
+          return true;
+      }
+    }
+
+    // No invalid characters. Could still be IPv4 or a hostname.
+    host_info->family = CanonHostInfo::NEUTRAL;
+    return false;
+  }
+
+  host_info->out_host.begin = output->length();
+  output->push_back('[');
+  AppendIPv6Address(host_info->address, output);
+  output->push_back(']');
+  host_info->out_host.len = output->length() - host_info->out_host.begin;
+
+  host_info->family = CanonHostInfo::IPV6;
+  return true;
+}
+
+}  // namespace
+
+void AppendIPv4Address(const unsigned char address[4], CanonOutput* output) {
+  for (int i = 0; i < 4; i++) {
+    char str[16];
+    _itoa_s(address[i], str, 10);
+
+    for (int ch = 0; str[ch] != 0; ch++)
+      output->push_back(str[ch]);
+
+    if (i != 3)
+      output->push_back('.');
+  }
+}
+
+void AppendIPv6Address(const unsigned char address[16], CanonOutput* output) {
+  // We will output the address according to the rules in:
+  // http://tools.ietf.org/html/draft-kawamura-ipv6-text-representation-01#section-4
+
+  // Start by finding where to place the "::" contraction (if any).
+  Component contraction_range;
+  ChooseIPv6ContractionRange(address, &contraction_range);
+
+  for (int i = 0; i <= 14;) {
+    // We check 2 bytes at a time, from bytes (0, 1) to (14, 15), inclusive.
+    DCHECK(i % 2 == 0);
+    if (i == contraction_range.begin && contraction_range.len > 0) {
+      // Jump over the contraction.
+      if (i == 0)
+        output->push_back(':');
+      output->push_back(':');
+      i = contraction_range.end();
+    } else {
+      // Consume the next 16 bits from |address|.
+      int x = address[i] << 8 | address[i + 1];
+
+      i += 2;
+
+      // Stringify the 16 bit number (at most requires 4 hex digits).
+      char str[5];
+      _itoa_s(x, str, 16);
+      for (int ch = 0; str[ch] != 0; ++ch)
+        output->push_back(str[ch]);
+
+      // Put a colon after each number, except the last.
+      if (i < 16)
+        output->push_back(':');
+    }
+  }
+}
+
+void CanonicalizeIPAddress(const char* spec,
+                           const Component& host,
+                           CanonOutput* output,
+                           CanonHostInfo* host_info) {
+  if (DoCanonicalizeIPv4Address<char, unsigned char>(
+          spec, host, output, host_info))
+    return;
+  if (DoCanonicalizeIPv6Address<char, unsigned char>(
+          spec, host, output, host_info))
+    return;
+}
+
+void CanonicalizeIPAddress(const char16_t* spec,
+                           const Component& host,
+                           CanonOutput* output,
+                           CanonHostInfo* host_info) {
+  if (DoCanonicalizeIPv4Address<char16_t, char16_t>(spec, host, output,
+                                                    host_info))
+    return;
+  if (DoCanonicalizeIPv6Address<char16_t, char16_t>(spec, host, output,
+                                                    host_info))
+    return;
+}
+
+CanonHostInfo::Family IPv4AddressToNumber(const char* spec,
+                                          const Component& host,
+                                          unsigned char address[4],
+                                          int* num_ipv4_components) {
+  return DoIPv4AddressToNumber<char, unsigned char>(spec, host, address,
+                                                    num_ipv4_components);
+}
+
+CanonHostInfo::Family IPv4AddressToNumber(const char16_t* spec,
+                                          const Component& host,
+                                          unsigned char address[4],
+                                          int* num_ipv4_components) {
+  return DoIPv4AddressToNumber<char16_t, char16_t>(spec, host, address,
+                                                   num_ipv4_components);
+}
+
+bool IPv6AddressToNumber(const char* spec,
+                         const Component& host,
+                         unsigned char address[16]) {
+  return DoIPv6AddressToNumber<char, unsigned char>(spec, host, address);
+}
+
+bool IPv6AddressToNumber(const char16_t* spec,
+                         const Component& host,
+                         unsigned char address[16]) {
+  return DoIPv6AddressToNumber<char16_t, char16_t>(spec, host, address);
+}
+
+}  // namespace url
diff --git a/url_canon_ip.h b/url_canon_ip.h
new file mode 100644
index 00000000000..86be08a5f14
--- /dev/null
+++ b/url_canon_ip.h
@@ -0,0 +1,60 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_CANON_IP_H_
+#define URL_URL_CANON_IP_H_
+
+#include "base/component_export.h"
+#include "url/third_party/mozilla/url_parse.h"
+#include "url/url_canon.h"
+
+namespace url {
+
+// Writes the given IPv4 address to |output|.
+COMPONENT_EXPORT(URL)
+void AppendIPv4Address(const unsigned char address[4], CanonOutput* output);
+
+// Writes the given IPv6 address to |output|.
+COMPONENT_EXPORT(URL)
+void AppendIPv6Address(const unsigned char address[16], CanonOutput* output);
+
+// Converts an IPv4 address to a 32-bit number (network byte order).
+//
+// Possible return values:
+//   IPV4    - IPv4 address was successfully parsed.
+//   BROKEN  - Input was formatted like an IPv4 address, but overflow occurred
+//             during parsing.
+//   NEUTRAL - Input couldn't possibly be interpreted as an IPv4 address.
+//             It might be an IPv6 address, or a hostname.
+//
+// On success, |num_ipv4_components| will be populated with the number of
+// components in the IPv4 address.
+COMPONENT_EXPORT(URL)
+CanonHostInfo::Family IPv4AddressToNumber(const char* spec,
+                                          const Component& host,
+                                          unsigned char address[4],
+                                          int* num_ipv4_components);
+COMPONENT_EXPORT(URL)
+CanonHostInfo::Family IPv4AddressToNumber(const char16_t* spec,
+                                          const Component& host,
+                                          unsigned char address[4],
+                                          int* num_ipv4_components);
+
+// Converts an IPv6 address to a 128-bit number (network byte order), returning
+// true on success. False means that the input was not a valid IPv6 address.
+//
+// NOTE that |host| is expected to be surrounded by square brackets.
+// i.e. "[::1]" rather than "::1".
+COMPONENT_EXPORT(URL)
+bool IPv6AddressToNumber(const char* spec,
+                         const Component& host,
+                         unsigned char address[16]);
+COMPONENT_EXPORT(URL)
+bool IPv6AddressToNumber(const char16_t* spec,
+                         const Component& host,
+                         unsigned char address[16]);
+
+}  // namespace url
+
+#endif  // URL_URL_CANON_IP_H_
diff --git a/url_canon_mailtourl.cc b/url_canon_mailtourl.cc
new file mode 100644
index 00000000000..e48b6422f8a
--- /dev/null
+++ b/url_canon_mailtourl.cc
@@ -0,0 +1,127 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Functions for canonicalizing "mailto:" URLs.
+
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+#include "url/url_file.h"
+#include "url/url_parse_internal.h"
+
+namespace url {
+
+namespace {
+
+// Certain characters should be percent-encoded when they appear in the path
+// component of a mailto URL, to improve compatibility and mitigate against
+// command-injection attacks on mailto handlers. See https://crbug.com/711020.
+template <typename UCHAR>
+bool ShouldEncodeMailboxCharacter(UCHAR uch) {
+  if (uch < 0x21 ||                              // space & control characters.
+      uch > 0x7e ||                              // high-ascii characters.
+      uch == 0x22 ||                             // quote.
+      uch == 0x3c || uch == 0x3e ||              // angle brackets.
+      uch == 0x60 ||                             // backtick.
+      uch == 0x7b || uch == 0x7c || uch == 0x7d  // braces and pipe.
+      ) {
+    return true;
+  }
+  return false;
+}
+
+template <typename CHAR, typename UCHAR>
+bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
+                             const Parsed& parsed,
+                             CanonOutput* output,
+                             Parsed* new_parsed) {
+  // mailto: only uses {scheme, path, query} -- clear the rest.
+  new_parsed->username = Component();
+  new_parsed->password = Component();
+  new_parsed->host = Component();
+  new_parsed->port = Component();
+  new_parsed->ref = Component();
+
+  // Scheme (known, so we don't bother running it through the more
+  // complicated scheme canonicalizer).
+  new_parsed->scheme.begin = output->length();
+  output->Append("mailto:", 7);
+  new_parsed->scheme.len = 6;
+
+  bool success = true;
+
+  // Path
+  if (parsed.path.is_valid()) {
+    new_parsed->path.begin = output->length();
+
+    // Copy the path using path URL's more lax escaping rules.
+    // We convert to UTF-8 and escape non-ASCII, but leave most
+    // ASCII characters alone.
+    size_t end = static_cast<size_t>(parsed.path.end());
+    for (size_t i = static_cast<size_t>(parsed.path.begin); i < end; ++i) {
+      UCHAR uch = static_cast<UCHAR>(source.path[i]);
+      if (ShouldEncodeMailboxCharacter<UCHAR>(uch))
+        success &= AppendUTF8EscapedChar(source.path, &i, end, output);
+      else
+        output->push_back(static_cast<char>(uch));
+    }
+
+    new_parsed->path.len = output->length() - new_parsed->path.begin;
+  } else {
+    // No path at all
+    new_parsed->path.reset();
+  }
+
+  // Query -- always use the default UTF8 charset converter.
+  CanonicalizeQuery(source.query, parsed.query, NULL,
+                    output, &new_parsed->query);
+
+  return success;
+}
+
+} // namespace
+
+bool CanonicalizeMailtoURL(const char* spec,
+                           int spec_len,
+                           const Parsed& parsed,
+                           CanonOutput* output,
+                           Parsed* new_parsed) {
+  return DoCanonicalizeMailtoURL<char, unsigned char>(
+      URLComponentSource<char>(spec), parsed, output, new_parsed);
+}
+
+bool CanonicalizeMailtoURL(const char16_t* spec,
+                           int spec_len,
+                           const Parsed& parsed,
+                           CanonOutput* output,
+                           Parsed* new_parsed) {
+  return DoCanonicalizeMailtoURL<char16_t, char16_t>(
+      URLComponentSource<char16_t>(spec), parsed, output, new_parsed);
+}
+
+bool ReplaceMailtoURL(const char* base,
+                      const Parsed& base_parsed,
+                      const Replacements<char>& replacements,
+                      CanonOutput* output,
+                      Parsed* new_parsed) {
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupOverrideComponents(base, replacements, &source, &parsed);
+  return DoCanonicalizeMailtoURL<char, unsigned char>(
+      source, parsed, output, new_parsed);
+}
+
+bool ReplaceMailtoURL(const char* base,
+                      const Parsed& base_parsed,
+                      const Replacements<char16_t>& replacements,
+                      CanonOutput* output,
+                      Parsed* new_parsed) {
+  RawCanonOutput<1024> utf8;
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
+  return DoCanonicalizeMailtoURL<char, unsigned char>(
+      source, parsed, output, new_parsed);
+}
+
+}  // namespace url
diff --git a/url_canon_path.cc b/url_canon_path.cc
new file mode 100644
index 00000000000..676468d5dfb
--- /dev/null
+++ b/url_canon_path.cc
@@ -0,0 +1,474 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <limits.h>
+
+#include "base/check.h"
+#include "base/check_op.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+#include "url/url_parse_internal.h"
+
+namespace url {
+
+namespace {
+
+enum CharacterFlags {
+  // Pass through unchanged, whether escaped or unescaped. This doesn't
+  // actually set anything so you can't OR it to check, it's just to make the
+  // table below more clear when neither ESCAPE or UNESCAPE is set.
+  PASS = 0,
+
+  // This character requires special handling in DoPartialPathInternal. Doing
+  // this test
+  // first allows us to filter out the common cases of regular characters that
+  // can be directly copied.
+  SPECIAL = 1,
+
+  // This character must be escaped in the canonical output. Note that all
+  // escaped chars also have the "special" bit set so that the code that looks
+  // for this is triggered. Not valid with PASS or ESCAPE
+  ESCAPE_BIT = 2,
+  ESCAPE = ESCAPE_BIT | SPECIAL,
+
+  // This character must be unescaped in canonical output. Not valid with
+  // ESCAPE or PASS. We DON'T set the SPECIAL flag since if we encounter these
+  // characters unescaped, they should just be copied.
+  UNESCAPE = 4,
+
+  // This character is disallowed in URLs. Note that the "special" bit is also
+  // set to trigger handling.
+  INVALID_BIT = 8,
+  INVALID = INVALID_BIT | SPECIAL,
+};
+
+// This table contains one of the above flag values. Note some flags are more
+// than one bits because they also turn on the "special" flag. Special is the
+// only flag that may be combined with others.
+//
+// This table is designed to match exactly what IE does with the characters.
+//
+// Dot is even more special, and the escaped version is handled specially by
+// IsDot. Therefore, we don't need the "escape" flag, and even the "unescape"
+// bit is never handled (we just need the "special") bit.
+const unsigned char kPathCharLookup[0x100] = {
+//   NULL     control chars...
+     INVALID, ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,
+//   control chars...
+     ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,
+//   ' '      !        "        #        $        %        &        '        (        )        *        +        ,        -        .        /
+     ESCAPE,  PASS,    ESCAPE,  ESCAPE,  PASS,    ESCAPE,  PASS,    PASS,    PASS,    PASS,    PASS,    PASS,    PASS,    UNESCAPE,SPECIAL, PASS,
+//   0        1        2        3        4        5        6        7        8        9        :        ;        <        =        >        ?
+     UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,PASS,    PASS,    ESCAPE,  PASS,    ESCAPE,  ESCAPE,
+//   @        A        B        C        D        E        F        G        H        I        J        K        L        M        N        O
+     PASS,    UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,
+//   P        Q        R        S        T        U        V        W        X        Y        Z        [        \        ]        ^        _
+     UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,PASS,    ESCAPE,  PASS,    ESCAPE,  UNESCAPE,
+//   `        a        b        c        d        e        f        g        h        i        j        k        l        m        n        o
+     ESCAPE,  UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,
+//   p        q        r        s        t        u        v        w        x        y        z        {        |        }        ~        <NBSP>
+     UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,UNESCAPE,ESCAPE,  ESCAPE,  ESCAPE,  UNESCAPE,ESCAPE,
+//   ...all the high-bit characters are escaped
+     ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,
+     ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,
+     ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,
+     ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,
+     ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,
+     ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,
+     ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,
+     ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE,  ESCAPE};
+
+enum DotDisposition {
+  // The given dot is just part of a filename and is not special.
+  NOT_A_DIRECTORY,
+
+  // The given dot is the current directory.
+  DIRECTORY_CUR,
+
+  // The given dot is the first of a double dot that should take us up one.
+  DIRECTORY_UP
+};
+
+// When the path resolver finds a dot, this function is called with the
+// character following that dot to see what it is. The return value
+// indicates what type this dot is (see above). This code handles the case
+// where the dot is at the end of the input.
+//
+// |*consumed_len| will contain the number of characters in the input that
+// express what we found.
+//
+// If the input is "../foo", |after_dot| = 1, |end| = 6, and
+// at the end, |*consumed_len| = 2 for the "./" this function consumed. The
+// original dot length should be handled by the caller.
+template <typename CHAR>
+DotDisposition ClassifyAfterDot(const CHAR* spec,
+                                size_t after_dot,
+                                size_t end,
+                                size_t* consumed_len) {
+  if (after_dot == end) {
+    // Single dot at the end.
+    *consumed_len = 0;
+    return DIRECTORY_CUR;
+  }
+  if (IsURLSlash(spec[after_dot])) {
+    // Single dot followed by a slash.
+    *consumed_len = 1;  // Consume the slash
+    return DIRECTORY_CUR;
+  }
+
+  size_t second_dot_len = IsDot(spec, after_dot, end);
+  if (second_dot_len) {
+    size_t after_second_dot = after_dot + second_dot_len;
+    if (after_second_dot == end) {
+      // Double dot at the end.
+      *consumed_len = second_dot_len;
+      return DIRECTORY_UP;
+    }
+    if (IsURLSlash(spec[after_second_dot])) {
+      // Double dot followed by a slash.
+      *consumed_len = second_dot_len + 1;
+      return DIRECTORY_UP;
+    }
+  }
+
+  // The dots are followed by something else, not a directory.
+  *consumed_len = 0;
+  return NOT_A_DIRECTORY;
+}
+
+// Rewinds the output to the previous slash. It is assumed that the output
+// ends with a slash and this doesn't count (we call this when we are
+// appending directory paths, so the previous path component has and ending
+// slash).
+//
+// This will stop at the first slash (assumed to be at position
+// |path_begin_in_output| and not go any higher than that. Some web pages
+// do ".." too many times, so we need to handle that brokenness.
+//
+// It searches for a literal slash rather than including a backslash as well
+// because it is run only on the canonical output.
+//
+// The output is guaranteed to end in a slash when this function completes.
+void BackUpToPreviousSlash(size_t path_begin_in_output, CanonOutput* output) {
+  CHECK(output->length() > 0);
+  CHECK(path_begin_in_output < output->length());
+
+  size_t i = output->length() - 1;
+  DCHECK(output->at(i) == '/');
+  if (i == path_begin_in_output)
+    return;  // We're at the first slash, nothing to do.
+
+  // Now back up (skipping the trailing slash) until we find another slash.
+  do {
+    --i;
+  } while (output->at(i) != '/' && i > path_begin_in_output);
+
+  // Now shrink the output to just include that last slash we found.
+  output->set_length(i + 1);
+}
+
+// Looks for problematic nested escape sequences and escapes the output as
+// needed to ensure they can't be misinterpreted.
+//
+// Our concern is that in input escape sequence that's invalid because it
+// contains nested escape sequences might look valid once those are unescaped.
+// For example, "%%300" is not a valid escape sequence, but after unescaping the
+// inner "%30" this becomes "%00" which is valid.  Leaving this in the output
+// string can result in callers re-canonicalizing the string and unescaping this
+// sequence, thus resulting in something fundamentally different than the
+// original input here.  This can cause a variety of problems.
+//
+// This function is called after we've just unescaped a sequence that's within
+// two output characters of a previous '%' that we know didn't begin a valid
+// escape sequence in the input string.  We look for whether the output is going
+// to turn into a valid escape sequence, and if so, convert the initial '%' into
+// an escaped "%25" so the output can't be misinterpreted.
+//
+// |spec| is the input string we're canonicalizing.
+// |next_input_index| is the index of the next unprocessed character in |spec|.
+// |input_len| is the length of |spec|.
+// |last_invalid_percent_index| is the index in |output| of a previously-seen
+// '%' character.  The caller knows this '%' character isn't followed by a valid
+// escape sequence in the input string.
+// |output| is the canonicalized output thus far.  The caller guarantees this
+// ends with a '%' followed by one or two characters, and the '%' is the one
+// pointed to by |last_invalid_percent_index|.  The last character in the string
+// was just unescaped.
+template <typename CHAR>
+void CheckForNestedEscapes(const CHAR* spec,
+                           size_t next_input_index,
+                           size_t input_len,
+                           size_t last_invalid_percent_index,
+                           CanonOutput* output) {
+  const size_t length = output->length();
+  const char last_unescaped_char = output->at(length - 1);
+
+  // If |output| currently looks like "%c", we need to try appending the next
+  // input character to see if this will result in a problematic escape
+  // sequence.  Note that this won't trigger on the first nested escape of a
+  // two-escape sequence like "%%30%30" -- we'll allow the conversion to
+  // "%0%30" -- but the second nested escape will be caught by this function
+  // when it's called again in that case.
+  const bool append_next_char = last_invalid_percent_index == length - 2;
+  if (append_next_char) {
+    // If the input doesn't contain a 7-bit character next, this case won't be a
+    // problem.
+    if ((next_input_index == input_len) || (spec[next_input_index] >= 0x80))
+      return;
+    output->push_back(static_cast<char>(spec[next_input_index]));
+  }
+
+  // Now output ends like "%cc".  Try to unescape this.
+  size_t begin = last_invalid_percent_index;
+  unsigned char temp;
+  if (DecodeEscaped(output->data(), &begin, output->length(), &temp)) {
+    // New escape sequence found.  Overwrite the characters following the '%'
+    // with "25", and push_back() the one or two characters that were following
+    // the '%' when we were called.
+    if (!append_next_char)
+      output->push_back(output->at(last_invalid_percent_index + 1));
+    output->set(last_invalid_percent_index + 1, '2');
+    output->set(last_invalid_percent_index + 2, '5');
+    output->push_back(last_unescaped_char);
+  } else if (append_next_char) {
+    // Not a valid escape sequence, but we still need to undo appending the next
+    // source character so the caller can process it normally.
+    output->set_length(length);
+  }
+}
+
+// Canonicalizes and appends the given path to the output. It assumes that if
+// the input path starts with a slash, it should be copied to the output.
+//
+// If there are already path components (this mode is used when appending
+// relative paths for resolving), it assumes that the output already has
+// a trailing slash and that if the input begins with a slash, it should be
+// copied to the output.
+//
+// We do not collapse multiple slashes in a row to a single slash. It seems
+// no web browsers do this, and we don't want incompatibilities, even though
+// it would be correct for most systems.
+template <typename CHAR, typename UCHAR>
+bool DoPartialPathInternal(const CHAR* spec,
+                           const Component& path,
+                           size_t path_begin_in_output,
+                           CanonOutput* output) {
+  if (path.is_empty())
+    return true;
+
+  size_t end = static_cast<size_t>(path.end());
+
+  // We use this variable to minimize the amount of work done when unescaping --
+  // we'll only call CheckForNestedEscapes() when this points at one of the last
+  // couple of characters in |output|.
+  absl::optional<size_t> last_invalid_percent_index;
+
+  bool success = true;
+  for (size_t i = static_cast<size_t>(path.begin); i < end; i++) {
+    UCHAR uch = static_cast<UCHAR>(spec[i]);
+    if (sizeof(CHAR) > 1 && uch >= 0x80) {
+      // We only need to test wide input for having non-ASCII characters. For
+      // narrow input, we'll always just use the lookup table. We don't try to
+      // do anything tricky with decoding/validating UTF-8. This function will
+      // read one or two UTF-16 characters and append the output as UTF-8. This
+      // call will be removed in 8-bit mode.
+      success &= AppendUTF8EscapedChar(spec, &i, end, output);
+    } else {
+      // Normal ASCII character or 8-bit input, use the lookup table.
+      unsigned char out_ch = static_cast<unsigned char>(uch);
+      unsigned char flags = kPathCharLookup[out_ch];
+      if (flags & SPECIAL) {
+        // Needs special handling of some sort.
+        size_t dotlen;
+        if ((dotlen = IsDot(spec, i, end)) > 0) {
+          // See if this dot was preceded by a slash in the output.
+          //
+          // Note that we check this in the case of dots so we don't have to
+          // special case slashes. Since slashes are much more common than
+          // dots, this actually increases performance measurably (though
+          // slightly).
+          if (output->length() > path_begin_in_output &&
+              output->at(output->length() - 1) == '/') {
+            // Slash followed by a dot, check to see if this is means relative
+            size_t consumed_len;
+            switch (ClassifyAfterDot<CHAR>(spec, i + dotlen, end,
+                                           &consumed_len)) {
+              case NOT_A_DIRECTORY:
+                // Copy the dot to the output, it means nothing special.
+                output->push_back('.');
+                i += dotlen - 1;
+                break;
+              case DIRECTORY_CUR:  // Current directory, just skip the input.
+                i += dotlen + consumed_len - 1;
+                break;
+              case DIRECTORY_UP:
+                BackUpToPreviousSlash(path_begin_in_output, output);
+                if (last_invalid_percent_index >= output->length()) {
+                  last_invalid_percent_index = absl::nullopt;
+                }
+                i += dotlen + consumed_len - 1;
+                break;
+            }
+          } else {
+            // This dot is not preceded by a slash, it is just part of some
+            // file name.
+            output->push_back('.');
+            i += dotlen - 1;
+          }
+
+        } else if (out_ch == '\\') {
+          // Convert backslashes to forward slashes
+          output->push_back('/');
+
+        } else if (out_ch == '%') {
+          // Handle escape sequences.
+          unsigned char unescaped_value;
+          if (DecodeEscaped(spec, &i, end, &unescaped_value)) {
+            // Valid escape sequence, see if we keep, reject, or unescape it.
+            // Note that at this point DecodeEscape() will have advanced |i| to
+            // the last character of the escape sequence.
+            char unescaped_flags = kPathCharLookup[unescaped_value];
+
+            if (unescaped_flags & UNESCAPE) {
+              // This escaped value shouldn't be escaped.  Try to copy it.
+              output->push_back(unescaped_value);
+              // If we just unescaped a value within 2 output characters of the
+              // '%' from a previously-detected invalid escape sequence, we
+              // might have an input string with problematic nested escape
+              // sequences; detect and fix them.
+              if (last_invalid_percent_index.has_value() &&
+                  ((last_invalid_percent_index.value() + 3) >=
+                   output->length())) {
+                CheckForNestedEscapes(spec, i + 1, end,
+                                      last_invalid_percent_index.value(),
+                                      output);
+              }
+            } else {
+              // Either this is an invalid escaped character, or it's a valid
+              // escaped character we should keep escaped.  In the first case we
+              // should just copy it exactly and remember the error.  In the
+              // second we also copy exactly in case the server is sensitive to
+              // changing the case of any hex letters.
+              output->push_back('%');
+              output->push_back(static_cast<char>(spec[i - 1]));
+              output->push_back(static_cast<char>(spec[i]));
+              if (unescaped_flags & INVALID_BIT)
+                success = false;
+            }
+          } else {
+            // Invalid escape sequence. IE7+ rejects any URLs with such
+            // sequences, while other browsers pass them through unchanged. We
+            // use the permissive behavior.
+            // TODO(brettw): Consider testing IE's strict behavior, which would
+            // allow removing the code to handle nested escapes above.
+            last_invalid_percent_index = output->length();
+            output->push_back('%');
+          }
+
+        } else if (flags & INVALID_BIT) {
+          // For NULLs, etc. fail.
+          AppendEscapedChar(out_ch, output);
+          success = false;
+
+        } else if (flags & ESCAPE_BIT) {
+          // This character should be escaped.
+          AppendEscapedChar(out_ch, output);
+        }
+      } else {
+        // Nothing special about this character, just append it.
+        output->push_back(out_ch);
+      }
+    }
+  }
+  return success;
+}
+
+// Perform the same logic as in DoPartialPathInternal(), but updates the
+// publicly exposed CanonOutput structure similar to DoPath().  Returns
+// true if successful.
+template <typename CHAR, typename UCHAR>
+bool DoPartialPath(const CHAR* spec,
+                   const Component& path,
+                   CanonOutput* output,
+                   Component* out_path) {
+  out_path->begin = output->length();
+  bool success =
+      DoPartialPathInternal<CHAR, UCHAR>(spec, path, out_path->begin, output);
+  out_path->len = output->length() - out_path->begin;
+  return success;
+}
+
+template<typename CHAR, typename UCHAR>
+bool DoPath(const CHAR* spec,
+            const Component& path,
+            CanonOutput* output,
+            Component* out_path) {
+  bool success = true;
+  out_path->begin = output->length();
+  if (path.is_nonempty()) {
+    // Write out an initial slash if the input has none. If we just parse a URL
+    // and then canonicalize it, it will of course have a slash already. This
+    // check is for the replacement and relative URL resolving cases of file
+    // URLs.
+    if (!IsURLSlash(spec[path.begin]))
+      output->push_back('/');
+
+    success =
+        DoPartialPathInternal<CHAR, UCHAR>(spec, path, out_path->begin, output);
+  } else {
+    // No input, canonical path is a slash.
+    output->push_back('/');
+  }
+  out_path->len = output->length() - out_path->begin;
+  return success;
+}
+
+}  // namespace
+
+bool CanonicalizePath(const char* spec,
+                      const Component& path,
+                      CanonOutput* output,
+                      Component* out_path) {
+  return DoPath<char, unsigned char>(spec, path, output, out_path);
+}
+
+bool CanonicalizePath(const char16_t* spec,
+                      const Component& path,
+                      CanonOutput* output,
+                      Component* out_path) {
+  return DoPath<char16_t, char16_t>(spec, path, output, out_path);
+}
+
+bool CanonicalizePartialPath(const char* spec,
+                             const Component& path,
+                             CanonOutput* output,
+                             Component* out_path) {
+  return DoPartialPath<char, unsigned char>(spec, path, output, out_path);
+}
+
+bool CanonicalizePartialPath(const char16_t* spec,
+                             const Component& path,
+                             CanonOutput* output,
+                             Component* out_path) {
+  return DoPartialPath<char16_t, char16_t>(spec, path, output, out_path);
+}
+
+bool CanonicalizePartialPathInternal(const char* spec,
+                                     const Component& path,
+                                     size_t path_begin_in_output,
+                                     CanonOutput* output) {
+  return DoPartialPathInternal<char, unsigned char>(
+      spec, path, path_begin_in_output, output);
+}
+
+bool CanonicalizePartialPathInternal(const char16_t* spec,
+                                     const Component& path,
+                                     size_t path_begin_in_output,
+                                     CanonOutput* output) {
+  return DoPartialPathInternal<char16_t, char16_t>(
+      spec, path, path_begin_in_output, output);
+}
+
+}  // namespace url
diff --git a/url_canon_pathurl.cc b/url_canon_pathurl.cc
new file mode 100644
index 00000000000..85983a8c3a7
--- /dev/null
+++ b/url_canon_pathurl.cc
@@ -0,0 +1,144 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Functions for canonicalizing "path" URLs. Not to be confused with the path
+// of a URL, these are URLs that have no authority section, only a path. For
+// example, "javascript:" and "data:".
+
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+
+namespace url {
+
+namespace {
+
+// Canonicalize the given |component| from |source| into |output| and
+// |new_component|. If |separator| is non-zero, it is pre-pended to |output|
+// prior to the canonicalized component; i.e. for the '?' or '#' characters.
+template <typename CHAR, typename UCHAR>
+void DoCanonicalizePathComponent(const CHAR* source,
+                                 const Component& component,
+                                 char separator,
+                                 CanonOutput* output,
+                                 Component* new_component) {
+  if (component.is_valid()) {
+    if (separator)
+      output->push_back(separator);
+    // Copy the path using path URL's more lax escaping rules (think for
+    // javascript:). We convert to UTF-8 and escape characters from the
+    // C0 control percent-encode set, but leave all other characters alone.
+    // This helps readability of JavaScript.
+    // https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
+    // https://url.spec.whatwg.org/#c0-control-percent-encode-set
+    new_component->begin = output->length();
+    size_t end = static_cast<size_t>(component.end());
+    for (size_t i = static_cast<size_t>(component.begin); i < end; i++) {
+      UCHAR uch = static_cast<UCHAR>(source[i]);
+      if (uch < 0x20 || uch > 0x7E)
+        AppendUTF8EscapedChar(source, &i, end, output);
+      else
+        output->push_back(static_cast<char>(uch));
+    }
+    new_component->len = output->length() - new_component->begin;
+  } else {
+    // Empty part.
+    new_component->reset();
+  }
+}
+
+template <typename CHAR, typename UCHAR>
+bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,
+                           const Parsed& parsed,
+                           CanonOutput* output,
+                           Parsed* new_parsed) {
+  // Scheme: this will append the colon.
+  bool success = CanonicalizeScheme(source.scheme, parsed.scheme,
+                                    output, &new_parsed->scheme);
+
+  // We assume there's no authority for path URLs. Note that hosts should never
+  // have -1 length.
+  new_parsed->username.reset();
+  new_parsed->password.reset();
+  new_parsed->host.reset();
+  new_parsed->port.reset();
+
+  // Canonicalize path via the weaker path URL rules.
+  //
+  // Note: parsing the path part should never cause a failure, see
+  // https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
+  DoCanonicalizePathComponent<CHAR, UCHAR>(source.path, parsed.path, '\0',
+                                           output, &new_parsed->path);
+
+  // Similar to mailto:, always use the default UTF-8 charset converter for
+  // query.
+  CanonicalizeQuery(source.query, parsed.query, nullptr, output,
+                    &new_parsed->query);
+
+  CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
+
+  return success;
+}
+
+}  // namespace
+
+bool CanonicalizePathURL(const char* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         CanonOutput* output,
+                         Parsed* new_parsed) {
+  return DoCanonicalizePathURL<char, unsigned char>(
+      URLComponentSource<char>(spec), parsed, output, new_parsed);
+}
+
+bool CanonicalizePathURL(const char16_t* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         CanonOutput* output,
+                         Parsed* new_parsed) {
+  return DoCanonicalizePathURL<char16_t, char16_t>(
+      URLComponentSource<char16_t>(spec), parsed, output, new_parsed);
+}
+
+void CanonicalizePathURLPath(const char* source,
+                             const Component& component,
+                             CanonOutput* output,
+                             Component* new_component) {
+  DoCanonicalizePathComponent<char, unsigned char>(source, component, '\0',
+                                                   output, new_component);
+}
+
+void CanonicalizePathURLPath(const char16_t* source,
+                             const Component& component,
+                             CanonOutput* output,
+                             Component* new_component) {
+  DoCanonicalizePathComponent<char16_t, char16_t>(source, component, '\0',
+                                                  output, new_component);
+}
+
+bool ReplacePathURL(const char* base,
+                    const Parsed& base_parsed,
+                    const Replacements<char>& replacements,
+                    CanonOutput* output,
+                    Parsed* new_parsed) {
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupOverrideComponents(base, replacements, &source, &parsed);
+  return DoCanonicalizePathURL<char, unsigned char>(
+      source, parsed, output, new_parsed);
+}
+
+bool ReplacePathURL(const char* base,
+                    const Parsed& base_parsed,
+                    const Replacements<char16_t>& replacements,
+                    CanonOutput* output,
+                    Parsed* new_parsed) {
+  RawCanonOutput<1024> utf8;
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
+  return DoCanonicalizePathURL<char, unsigned char>(
+      source, parsed, output, new_parsed);
+}
+
+}  // namespace url
diff --git a/url_canon_query.cc b/url_canon_query.cc
new file mode 100644
index 00000000000..47d20d1ffb6
--- /dev/null
+++ b/url_canon_query.cc
@@ -0,0 +1,149 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+
+// Query canonicalization in IE
+// ----------------------------
+// IE is very permissive for query parameters specified in links on the page
+// (in contrast to links that it constructs itself based on form data). It does
+// not unescape any character. It does not reject any escape sequence (be they
+// invalid like "%2y" or freaky like %00).
+//
+// IE only escapes spaces and nothing else. Embedded NULLs, tabs (0x09),
+// LF (0x0a), and CR (0x0d) are removed (this probably happens at an earlier
+// layer since they are removed from all portions of the URL). All other
+// characters are passed unmodified. Invalid UTF-16 sequences are preserved as
+// well, with each character in the input being converted to UTF-8. It is the
+// server's job to make sense of this invalid query.
+//
+// Invalid multibyte sequences (for example, invalid UTF-8 on a UTF-8 page)
+// are converted to the invalid character and sent as unescaped UTF-8 (0xef,
+// 0xbf, 0xbd). This may not be canonicalization, the parser may generate these
+// strings before the URL handler ever sees them.
+//
+// Our query canonicalization
+// --------------------------
+// We escape all non-ASCII characters and control characters, like Firefox.
+// This is more conformant to the URL spec, and there do not seem to be many
+// problems relating to Firefox's behavior.
+//
+// Like IE, we will never unescape (although the application may want to try
+// unescaping to present the user with a more understandable URL). We will
+// replace all invalid sequences (including invalid UTF-16 sequences, which IE
+// doesn't) with the "invalid character," and we will escape it.
+
+namespace url {
+
+namespace {
+
+// Appends the given string to the output, escaping characters that do not
+// match the given |type| in SharedCharTypes. This version will accept 8 or 16
+// bit characters, but assumes that they have only 7-bit values. It also assumes
+// that all UTF-8 values are correct, so doesn't bother checking
+template<typename CHAR>
+void AppendRaw8BitQueryString(const CHAR* source, int length,
+                              CanonOutput* output) {
+  for (int i = 0; i < length; i++) {
+    if (!IsQueryChar(static_cast<unsigned char>(source[i])))
+      AppendEscapedChar(static_cast<unsigned char>(source[i]), output);
+    else  // Doesn't need escaping.
+      output->push_back(static_cast<char>(source[i]));
+  }
+}
+
+// Runs the converter on the given UTF-8 input. Since the converter expects
+// UTF-16, we have to convert first. The converter must be non-NULL.
+void RunConverter(const char* spec,
+                  const Component& query,
+                  CharsetConverter* converter,
+                  CanonOutput* output) {
+  DCHECK(query.is_valid());
+  // This function will replace any misencoded values with the invalid
+  // character. This is what we want so we don't have to check for error.
+  RawCanonOutputW<1024> utf16;
+  ConvertUTF8ToUTF16(&spec[query.begin], static_cast<size_t>(query.len),
+                     &utf16);
+  converter->ConvertFromUTF16(utf16.data(), utf16.length(), output);
+}
+
+// Runs the converter with the given UTF-16 input. We don't have to do
+// anything, but this overridden function allows us to use the same code
+// for both UTF-8 and UTF-16 input.
+void RunConverter(const char16_t* spec,
+                  const Component& query,
+                  CharsetConverter* converter,
+                  CanonOutput* output) {
+  DCHECK(query.is_valid());
+  converter->ConvertFromUTF16(&spec[query.begin],
+                              static_cast<size_t>(query.len), output);
+}
+
+template <typename CHAR, typename UCHAR>
+void DoConvertToQueryEncoding(const CHAR* spec,
+                              const Component& query,
+                              CharsetConverter* converter,
+                              CanonOutput* output) {
+  if (converter) {
+    // Run the converter to get an 8-bit string, then append it, escaping
+    // necessary values.
+    RawCanonOutput<1024> eight_bit;
+    RunConverter(spec, query, converter, &eight_bit);
+    AppendRaw8BitQueryString(eight_bit.data(), eight_bit.length(), output);
+
+  } else {
+    // No converter, do our own UTF-8 conversion.
+    AppendStringOfType(&spec[query.begin], static_cast<size_t>(query.len),
+                       CHAR_QUERY, output);
+  }
+}
+
+template<typename CHAR, typename UCHAR>
+void DoCanonicalizeQuery(const CHAR* spec,
+                         const Component& query,
+                         CharsetConverter* converter,
+                         CanonOutput* output,
+                         Component* out_query) {
+  if (!query.is_valid()) {
+    *out_query = Component();
+    return;
+  }
+
+  output->push_back('?');
+  out_query->begin = output->length();
+
+  DoConvertToQueryEncoding<CHAR, UCHAR>(spec, query, converter, output);
+
+  out_query->len = output->length() - out_query->begin;
+}
+
+}  // namespace
+
+void CanonicalizeQuery(const char* spec,
+                       const Component& query,
+                       CharsetConverter* converter,
+                       CanonOutput* output,
+                       Component* out_query) {
+  DoCanonicalizeQuery<char, unsigned char>(spec, query, converter,
+                                           output, out_query);
+}
+
+void CanonicalizeQuery(const char16_t* spec,
+                       const Component& query,
+                       CharsetConverter* converter,
+                       CanonOutput* output,
+                       Component* out_query) {
+  DoCanonicalizeQuery<char16_t, char16_t>(spec, query, converter, output,
+                                          out_query);
+}
+
+void ConvertUTF16ToQueryEncoding(const char16_t* input,
+                                 const Component& query,
+                                 CharsetConverter* converter,
+                                 CanonOutput* output) {
+  DoConvertToQueryEncoding<char16_t, char16_t>(input, query, converter, output);
+}
+
+}  // namespace url
diff --git a/url_canon_relative.cc b/url_canon_relative.cc
new file mode 100644
index 00000000000..d8ea528a25a
--- /dev/null
+++ b/url_canon_relative.cc
@@ -0,0 +1,623 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Canonicalizer functions for working with and resolving relative URLs.
+
+#include <algorithm>
+#include <ostream>
+
+#include "base/check_op.h"
+#include "base/strings/string_util.h"
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+#include "url/url_constants.h"
+#include "url/url_features.h"
+#include "url/url_file.h"
+#include "url/url_parse_internal.h"
+#include "url/url_util.h"
+#include "url/url_util_internal.h"
+
+namespace url {
+
+namespace {
+
+// Firefox does a case-sensitive compare (which is probably wrong--Mozilla bug
+// 379034), whereas IE is case-insensitive.
+//
+// We choose to be more permissive like IE. We don't need to worry about
+// unescaping or anything here: neither IE or Firefox allow this. We also
+// don't have to worry about invalid scheme characters since we are comparing
+// against the canonical scheme of the base.
+//
+// The base URL should always be canonical, therefore it should be ASCII.
+template<typename CHAR>
+bool AreSchemesEqual(const char* base,
+                     const Component& base_scheme,
+                     const CHAR* cmp,
+                     const Component& cmp_scheme) {
+  if (base_scheme.len != cmp_scheme.len)
+    return false;
+  for (int i = 0; i < base_scheme.len; i++) {
+    // We assume the base is already canonical, so we don't have to
+    // canonicalize it.
+    if (CanonicalSchemeChar(cmp[cmp_scheme.begin + i]) !=
+        base[base_scheme.begin + i])
+      return false;
+  }
+  return true;
+}
+
+#ifdef WIN32
+
+// Here, we also allow Windows paths to be represented as "/C:/" so we can be
+// consistent about URL paths beginning with slashes. This function is like
+// DoesBeginWindowsDrivePath except that it also requires a slash at the
+// beginning.
+template<typename CHAR>
+bool DoesBeginSlashWindowsDriveSpec(const CHAR* spec, int start_offset,
+                                    int spec_len) {
+  if (start_offset >= spec_len)
+    return false;
+  return IsURLSlash(spec[start_offset]) &&
+         DoesBeginWindowsDriveSpec(spec, start_offset + 1, spec_len);
+}
+
+#endif  // WIN32
+
+template <typename CHAR>
+bool IsValidScheme(const CHAR* url, const Component& scheme) {
+  // Caller should ensure that the |scheme| is not empty.
+  DCHECK_NE(0, scheme.len);
+
+  // From https://url.spec.whatwg.org/#scheme-start-state:
+  //   scheme start state:
+  //     1. If c is an ASCII alpha, append c, lowercased, to buffer, and set
+  //        state to scheme state.
+  //     2. Otherwise, if state override is not given, set state to no scheme
+  //        state, and decrease pointer by one.
+  //     3. Otherwise, validation error, return failure.
+  // Note that both step 2 and step 3 mean that the scheme was not valid.
+  if (!base::IsAsciiAlpha(url[scheme.begin]))
+    return false;
+
+  // From https://url.spec.whatwg.org/#scheme-state:
+  //   scheme state:
+  //     1. If c is an ASCII alphanumeric, U+002B (+), U+002D (-), or U+002E
+  //        (.), append c, lowercased, to buffer.
+  //     2. Otherwise, if c is U+003A (:), then [...]
+  //
+  // We begin at |scheme.begin + 1|, because the character at |scheme.begin| has
+  // already been checked by base::IsAsciiAlpha above.
+  int scheme_end = scheme.end();
+  for (int i = scheme.begin + 1; i < scheme_end; i++) {
+    if (!CanonicalSchemeChar(url[i]))
+      return false;
+  }
+
+  return true;
+}
+
+// See IsRelativeURL in the header file for usage.
+template<typename CHAR>
+bool DoIsRelativeURL(const char* base,
+                     const Parsed& base_parsed,
+                     const CHAR* url,
+                     int url_len,
+                     bool is_base_hierarchical,
+                     bool* is_relative,
+                     Component* relative_component) {
+  *is_relative = false;  // So we can default later to not relative.
+
+  // Trim whitespace and construct a new range for the substring.
+  int begin = 0;
+  TrimURL(url, &begin, &url_len);
+  if (begin >= url_len) {
+    // Empty URLs are relative, but do nothing.
+    if (!is_base_hierarchical) {
+      // Don't allow relative URLs if the base scheme doesn't support it.
+      return false;
+    }
+    *relative_component = Component(begin, 0);
+    *is_relative = true;
+    return true;
+  }
+
+#ifdef WIN32
+  // We special case paths like "C:\foo" so they can link directly to the
+  // file on Windows (IE compatibility). The security domain stuff should
+  // prevent a link like this from actually being followed if its on a
+  // web page.
+  //
+  // We treat "C:/foo" as an absolute URL. We can go ahead and treat "/c:/"
+  // as relative, as this will just replace the path when the base scheme
+  // is a file and the answer will still be correct.
+  //
+  // We require strict backslashes when detecting UNC since two forward
+  // slashes should be treated a a relative URL with a hostname.
+  if (DoesBeginWindowsDriveSpec(url, begin, url_len) ||
+      DoesBeginUNCPath(url, begin, url_len, true))
+    return true;
+#endif  // WIN32
+
+  // See if we've got a scheme, if not, we know this is a relative URL.
+  // BUT, just because we have a scheme, doesn't make it absolute.
+  // "http:foo.html" is a relative URL with path "foo.html". If the scheme is
+  // empty, we treat it as relative (":foo"), like IE does.
+  Component scheme;
+  const bool scheme_is_empty =
+      !ExtractScheme(url, url_len, &scheme) || scheme.len == 0;
+  if (scheme_is_empty) {
+    if (url[begin] == '#') {
+      // |url| is a bare fragment (e.g. "#foo"). This can be resolved against
+      // any base. Fall-through.
+    } else if (!is_base_hierarchical) {
+      // Don't allow relative URLs if the base scheme doesn't support it.
+      return false;
+    }
+
+    *relative_component = MakeRange(begin, url_len);
+    *is_relative = true;
+    return true;
+  }
+
+  // If the scheme isn't valid, then it's relative.
+  if (!IsValidScheme(url, scheme)) {
+    if (url[begin] == '#' &&
+        base::FeatureList::IsEnabled(
+            kResolveBareFragmentWithColonOnNonHierarchical)) {
+      // |url| is a bare fragment (e.g. "#foo:bar"). This can be resolved
+      // against any base. Fall-through.
+    } else if (!is_base_hierarchical) {
+      // Don't allow relative URLs if the base scheme doesn't support it.
+      return false;
+    }
+    *relative_component = MakeRange(begin, url_len);
+    *is_relative = true;
+    return true;
+  }
+
+  // If the scheme is not the same, then we can't count it as relative.
+  if (!AreSchemesEqual(base, base_parsed.scheme, url, scheme))
+    return true;
+
+  // When the scheme that they both share is not hierarchical, treat the
+  // incoming scheme as absolute (this way with the base of "data:foo",
+  // "data:bar" will be reported as absolute.
+  if (!is_base_hierarchical)
+    return true;
+
+  int colon_offset = scheme.end();
+
+  // If it's a filesystem URL, the only valid way to make it relative is not to
+  // supply a scheme. There's no equivalent to e.g. http:index.html.
+  if (CompareSchemeComponent(url, scheme, kFileSystemScheme))
+    return true;
+
+  // ExtractScheme guarantees that the colon immediately follows what it
+  // considers to be the scheme. CountConsecutiveSlashes will handle the
+  // case where the begin offset is the end of the input.
+  int num_slashes = CountConsecutiveSlashes(url, colon_offset + 1, url_len);
+
+  if (num_slashes == 0 || num_slashes == 1) {
+    // No slashes means it's a relative path like "http:foo.html". One slash
+    // is an absolute path. "http:/home/foo.html"
+    *is_relative = true;
+    *relative_component = MakeRange(colon_offset + 1, url_len);
+    return true;
+  }
+
+  // Two or more slashes after the scheme we treat as absolute.
+  return true;
+}
+
+// Copies all characters in the range [begin, end) of |spec| to the output,
+// up until and including the last slash. There should be a slash in the
+// range, if not, nothing will be copied.
+//
+// For stardard URLs the input should be canonical, but when resolving relative
+// URLs on a non-standard base (like "data:") the input can be anything.
+void CopyToLastSlash(const char* spec,
+                     int begin,
+                     int end,
+                     CanonOutput* output) {
+  // Find the last slash.
+  int last_slash = -1;
+  for (int i = end - 1; i >= begin; i--) {
+    if (spec[i] == '/' || spec[i] == '\\') {
+      last_slash = i;
+      break;
+    }
+  }
+  if (last_slash < 0)
+    return;  // No slash.
+
+  // Copy.
+  for (int i = begin; i <= last_slash; i++)
+    output->push_back(spec[i]);
+}
+
+// Copies a single component from the source to the output. This is used
+// when resolving relative URLs and a given component is unchanged. Since the
+// source should already be canonical, we don't have to do anything special,
+// and the input is ASCII.
+void CopyOneComponent(const char* source,
+                      const Component& source_component,
+                      CanonOutput* output,
+                      Component* output_component) {
+  if (!source_component.is_valid()) {
+    // This component is not present.
+    *output_component = Component();
+    return;
+  }
+
+  output_component->begin = output->length();
+  int source_end = source_component.end();
+  for (int i = source_component.begin; i < source_end; i++)
+    output->push_back(source[i]);
+  output_component->len = output->length() - output_component->begin;
+}
+
+#ifdef WIN32
+
+// Called on Windows when the base URL is a file URL, this will copy the "C:"
+// to the output, if there is a drive letter and if that drive letter is not
+// being overridden by the relative URL. Otherwise, do nothing.
+//
+// It will return the index of the beginning of the next character in the
+// base to be processed: if there is a "C:", the slash after it, or if
+// there is no drive letter, the slash at the beginning of the path, or
+// the end of the base. This can be used as the starting offset for further
+// path processing.
+template<typename CHAR>
+int CopyBaseDriveSpecIfNecessary(const char* base_url,
+                                 int base_path_begin,
+                                 int base_path_end,
+                                 const CHAR* relative_url,
+                                 int path_start,
+                                 int relative_url_len,
+                                 CanonOutput* output) {
+  if (base_path_begin >= base_path_end)
+    return base_path_begin;  // No path.
+
+  // If the relative begins with a drive spec, don't do anything. The existing
+  // drive spec in the base will be replaced.
+  if (DoesBeginWindowsDriveSpec(relative_url, path_start, relative_url_len)) {
+    return base_path_begin;  // Relative URL path is "C:/foo"
+  }
+
+  // The path should begin with a slash (as all canonical paths do). We check
+  // if it is followed by a drive letter and copy it.
+  if (DoesBeginSlashWindowsDriveSpec(base_url,
+                                     base_path_begin,
+                                     base_path_end)) {
+    // Copy the two-character drive spec to the output. It will now look like
+    // "file:///C:" so the rest of it can be treated like a standard path.
+    output->push_back('/');
+    output->push_back(base_url[base_path_begin + 1]);
+    output->push_back(base_url[base_path_begin + 2]);
+    return base_path_begin + 3;
+  }
+
+  return base_path_begin;
+}
+
+#endif  // WIN32
+
+// A subroutine of DoResolveRelativeURL, this resolves the URL knowning that
+// the input is a relative path or less (query or ref).
+template<typename CHAR>
+bool DoResolveRelativePath(const char* base_url,
+                           const Parsed& base_parsed,
+                           bool base_is_file,
+                           const CHAR* relative_url,
+                           const Component& relative_component,
+                           CharsetConverter* query_converter,
+                           CanonOutput* output,
+                           Parsed* out_parsed) {
+  bool success = true;
+
+  // We know the authority section didn't change, copy it to the output. We
+  // also know we have a path so can copy up to there.
+  Component path, query, ref;
+  ParsePathInternal(relative_url, relative_component, &path, &query, &ref);
+
+  // Canonical URLs always have a path, so we can use that offset. Reserve
+  // enough room for the base URL, the new path, and some extra bytes for
+  // possible escaped characters.
+  output->ReserveSizeIfNeeded(base_parsed.path.begin +
+                              std::max({path.end(), query.end(), ref.end()}));
+  output->Append(base_url, base_parsed.path.begin);
+
+  if (path.is_nonempty()) {
+    // The path is replaced or modified.
+    int true_path_begin = output->length();
+
+    // For file: URLs on Windows, we don't want to treat the drive letter and
+    // colon as part of the path for relative file resolution when the
+    // incoming URL does not provide a drive spec. We save the true path
+    // beginning so we can fix it up after we are done.
+    int base_path_begin = base_parsed.path.begin;
+#ifdef WIN32
+    if (base_is_file) {
+      base_path_begin = CopyBaseDriveSpecIfNecessary(
+          base_url, base_parsed.path.begin, base_parsed.path.end(),
+          relative_url, relative_component.begin, relative_component.end(),
+          output);
+      // Now the output looks like either "file://" or "file:///C:"
+      // and we can start appending the rest of the path. |base_path_begin|
+      // points to the character in the base that comes next.
+    }
+#endif  // WIN32
+
+    if (IsURLSlash(relative_url[path.begin])) {
+      // Easy case: the path is an absolute path on the server, so we can
+      // just replace everything from the path on with the new versions.
+      // Since the input should be canonical hierarchical URL, we should
+      // always have a path.
+      success &= CanonicalizePath(relative_url, path,
+                                  output, &out_parsed->path);
+    } else {
+      // Relative path, replace the query, and reference. We take the
+      // original path with the file part stripped, and append the new path.
+      // The canonicalizer will take care of resolving ".." and "."
+      size_t path_begin = output->length();
+      CopyToLastSlash(base_url, base_path_begin, base_parsed.path.end(),
+                      output);
+      success &= CanonicalizePartialPathInternal(relative_url, path, path_begin,
+                                                 output);
+      out_parsed->path = MakeRange(path_begin, output->length());
+
+      // Copy the rest of the stuff after the path from the relative path.
+    }
+
+    // Finish with the query and reference part (these can't fail).
+    CanonicalizeQuery(relative_url, query, query_converter,
+                      output, &out_parsed->query);
+    CanonicalizeRef(relative_url, ref, output, &out_parsed->ref);
+
+    // Fix the path beginning to add back the "C:" we may have written above.
+    out_parsed->path = MakeRange(true_path_begin, out_parsed->path.end());
+    return success;
+  }
+
+  // If we get here, the path is unchanged: copy to output.
+  CopyOneComponent(base_url, base_parsed.path, output, &out_parsed->path);
+
+  if (query.is_valid()) {
+    // Just the query specified, replace the query and reference (ignore
+    // failures for refs)
+    CanonicalizeQuery(relative_url, query, query_converter,
+                      output, &out_parsed->query);
+    CanonicalizeRef(relative_url, ref, output, &out_parsed->ref);
+    return success;
+  }
+
+  // If we get here, the query is unchanged: copy to output. Note that the
+  // range of the query parameter doesn't include the question mark, so we
+  // have to add it manually if there is a component.
+  if (base_parsed.query.is_valid())
+    output->push_back('?');
+  CopyOneComponent(base_url, base_parsed.query, output, &out_parsed->query);
+
+  if (ref.is_valid()) {
+    // Just the reference specified: replace it (ignoring failures).
+    CanonicalizeRef(relative_url, ref, output, &out_parsed->ref);
+    return success;
+  }
+
+  // We should always have something to do in this function, the caller checks
+  // that some component is being replaced.
+  DCHECK(false) << "Not reached";
+  return success;
+}
+
+// Resolves a relative URL that contains a host. Typically, these will
+// be of the form "//www.google.com/foo/bar?baz#ref" and the only thing which
+// should be kept from the original URL is the scheme.
+template<typename CHAR>
+bool DoResolveRelativeHost(const char* base_url,
+                           const Parsed& base_parsed,
+                           const CHAR* relative_url,
+                           const Component& relative_component,
+                           CharsetConverter* query_converter,
+                           CanonOutput* output,
+                           Parsed* out_parsed) {
+  // Parse the relative URL, just like we would for anything following a
+  // scheme.
+  Parsed relative_parsed;  // Everything but the scheme is valid.
+  ParseAfterScheme(relative_url, relative_component.end(),
+                   relative_component.begin, &relative_parsed);
+
+  // Now we can just use the replacement function to replace all the necessary
+  // parts of the old URL with the new one.
+  Replacements<CHAR> replacements;
+  replacements.SetUsername(relative_url, relative_parsed.username);
+  replacements.SetPassword(relative_url, relative_parsed.password);
+  replacements.SetHost(relative_url, relative_parsed.host);
+  replacements.SetPort(relative_url, relative_parsed.port);
+  replacements.SetPath(relative_url, relative_parsed.path);
+  replacements.SetQuery(relative_url, relative_parsed.query);
+  replacements.SetRef(relative_url, relative_parsed.ref);
+
+  // Length() does not include the old scheme, so make sure to add it from the
+  // base URL.
+  output->ReserveSizeIfNeeded(
+      replacements.components().Length() +
+      base_parsed.CountCharactersBefore(Parsed::USERNAME, false));
+  SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
+  if (!GetStandardSchemeType(base_url, base_parsed.scheme, &scheme_type)) {
+    // A path with an authority section gets canonicalized under standard URL
+    // rules, even though the base was not known to be standard.
+    scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
+  }
+  return ReplaceStandardURL(base_url, base_parsed, replacements, scheme_type,
+                            query_converter, output, out_parsed);
+}
+
+// Resolves a relative URL that happens to be an absolute file path. Examples
+// include: "//hostname/path", "/c:/foo", and "//hostname/c:/foo".
+template<typename CHAR>
+bool DoResolveAbsoluteFile(const CHAR* relative_url,
+                           const Component& relative_component,
+                           CharsetConverter* query_converter,
+                           CanonOutput* output,
+                           Parsed* out_parsed) {
+  // Parse the file URL. The file URl parsing function uses the same logic
+  // as we do for determining if the file is absolute, in which case it will
+  // not bother to look for a scheme.
+  Parsed relative_parsed;
+  ParseFileURL(&relative_url[relative_component.begin], relative_component.len,
+               &relative_parsed);
+
+  return CanonicalizeFileURL(&relative_url[relative_component.begin],
+                             relative_component.len, relative_parsed,
+                             query_converter, output, out_parsed);
+}
+
+// TODO(brettw) treat two slashes as root like Mozilla for FTP?
+template<typename CHAR>
+bool DoResolveRelativeURL(const char* base_url,
+                          const Parsed& base_parsed,
+                          bool base_is_file,
+                          const CHAR* relative_url,
+                          const Component& relative_component,
+                          CharsetConverter* query_converter,
+                          CanonOutput* output,
+                          Parsed* out_parsed) {
+  // |base_parsed| is the starting point for our output. Since we may have
+  // removed whitespace from |relative_url| before entering this method, we'll
+  // carry over the |potentially_dangling_markup| flag.
+  bool potentially_dangling_markup = out_parsed->potentially_dangling_markup;
+  *out_parsed = base_parsed;
+  if (potentially_dangling_markup)
+    out_parsed->potentially_dangling_markup = true;
+
+  // Sanity check: the input should have a host or we'll break badly below.
+  // We can only resolve relative URLs with base URLs that have hosts and
+  // paths (even the default path of "/" is OK).
+  //
+  // We allow hosts with no length so we can handle file URLs, for example.
+  if (base_parsed.path.is_empty()) {
+    // On error, return the input (resolving a relative URL on a non-relative
+    // base = the base).
+    int base_len = base_parsed.Length();
+    for (int i = 0; i < base_len; i++)
+      output->push_back(base_url[i]);
+    return false;
+  }
+
+  if (relative_component.is_empty()) {
+    // Empty relative URL, leave unchanged, only removing the ref component.
+    int base_len = base_parsed.Length();
+    base_len -= base_parsed.ref.len + 1;
+    out_parsed->ref.reset();
+    output->Append(base_url, base_len);
+    return true;
+  }
+
+  int num_slashes = CountConsecutiveSlashes(
+      relative_url, relative_component.begin, relative_component.end());
+
+#ifdef WIN32
+  // On Windows, two slashes for a file path (regardless of which direction
+  // they are) means that it's UNC. Two backslashes on any base scheme mean
+  // that it's an absolute UNC path (we use the base_is_file flag to control
+  // how strict the UNC finder is).
+  //
+  // We also allow Windows absolute drive specs on any scheme (for example
+  // "c:\foo") like IE does. There must be no preceding slashes in this
+  // case (we reject anything like "/c:/foo") because that should be treated
+  // as a path. For file URLs, we allow any number of slashes since that would
+  // be setting the path.
+  //
+  // This assumes the absolute path resolver handles absolute URLs like this
+  // properly. DoCanonicalize does this.
+  int after_slashes = relative_component.begin + num_slashes;
+  if (DoesBeginUNCPath(relative_url, relative_component.begin,
+                       relative_component.end(), !base_is_file) ||
+      ((num_slashes == 0 || base_is_file) &&
+       DoesBeginWindowsDriveSpec(
+           relative_url, after_slashes, relative_component.end()))) {
+    return DoResolveAbsoluteFile(relative_url, relative_component,
+                                 query_converter, output, out_parsed);
+  }
+#else
+  // Other platforms need explicit handling for file: URLs with multiple
+  // slashes because the generic scheme parsing always extracts a host, but a
+  // file: URL only has a host if it has exactly 2 slashes. Even if it does
+  // have a host, we want to use the special host detection logic for file
+  // URLs provided by DoResolveAbsoluteFile(), as opposed to the generic host
+  // detection logic, for consistency with parsing file URLs from scratch.
+  if (base_is_file && num_slashes >= 2) {
+    return DoResolveAbsoluteFile(relative_url, relative_component,
+                                 query_converter, output, out_parsed);
+  }
+#endif
+
+  // Any other double-slashes mean that this is relative to the scheme.
+  if (num_slashes >= 2) {
+    return DoResolveRelativeHost(base_url, base_parsed,
+                                 relative_url, relative_component,
+                                 query_converter, output, out_parsed);
+  }
+
+  // When we get here, we know that the relative URL is on the same host.
+  return DoResolveRelativePath(base_url, base_parsed, base_is_file,
+                               relative_url, relative_component,
+                               query_converter, output, out_parsed);
+}
+
+}  // namespace
+
+bool IsRelativeURL(const char* base,
+                   const Parsed& base_parsed,
+                   const char* fragment,
+                   int fragment_len,
+                   bool is_base_hierarchical,
+                   bool* is_relative,
+                   Component* relative_component) {
+  return DoIsRelativeURL<char>(
+      base, base_parsed, fragment, fragment_len, is_base_hierarchical,
+      is_relative, relative_component);
+}
+
+bool IsRelativeURL(const char* base,
+                   const Parsed& base_parsed,
+                   const char16_t* fragment,
+                   int fragment_len,
+                   bool is_base_hierarchical,
+                   bool* is_relative,
+                   Component* relative_component) {
+  return DoIsRelativeURL<char16_t>(base, base_parsed, fragment, fragment_len,
+                                   is_base_hierarchical, is_relative,
+                                   relative_component);
+}
+
+bool ResolveRelativeURL(const char* base_url,
+                        const Parsed& base_parsed,
+                        bool base_is_file,
+                        const char* relative_url,
+                        const Component& relative_component,
+                        CharsetConverter* query_converter,
+                        CanonOutput* output,
+                        Parsed* out_parsed) {
+  return DoResolveRelativeURL<char>(
+      base_url, base_parsed, base_is_file, relative_url,
+      relative_component, query_converter, output, out_parsed);
+}
+
+bool ResolveRelativeURL(const char* base_url,
+                        const Parsed& base_parsed,
+                        bool base_is_file,
+                        const char16_t* relative_url,
+                        const Component& relative_component,
+                        CharsetConverter* query_converter,
+                        CanonOutput* output,
+                        Parsed* out_parsed) {
+  return DoResolveRelativeURL<char16_t>(base_url, base_parsed, base_is_file,
+                                        relative_url, relative_component,
+                                        query_converter, output, out_parsed);
+}
+
+}  // namespace url
diff --git a/url_canon_stdstring.cc b/url_canon_stdstring.cc
new file mode 100644
index 00000000000..60e2a26747c
--- /dev/null
+++ b/url_canon_stdstring.cc
@@ -0,0 +1,30 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/url_canon_stdstring.h"
+
+namespace url {
+
+StdStringCanonOutput::StdStringCanonOutput(std::string* str) : str_(str) {
+  cur_len_ = str_->size();  // Append to existing data.
+  buffer_ = str_->empty() ? nullptr : &(*str_)[0];
+  buffer_len_ = str_->size();
+}
+
+StdStringCanonOutput::~StdStringCanonOutput() {
+  // Nothing to do, we don't own the string.
+}
+
+void StdStringCanonOutput::Complete() {
+  str_->resize(cur_len_);
+  buffer_len_ = cur_len_;
+}
+
+void StdStringCanonOutput::Resize(size_t sz) {
+  str_->resize(sz);
+  buffer_ = str_->empty() ? nullptr : &(*str_)[0];
+  buffer_len_ = sz;
+}
+
+}  // namespace url
diff --git a/url_canon_stdstring.h b/url_canon_stdstring.h
new file mode 100644
index 00000000000..528f91f2f10
--- /dev/null
+++ b/url_canon_stdstring.h
@@ -0,0 +1,132 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_CANON_STDSTRING_H_
+#define URL_URL_CANON_STDSTRING_H_
+
+// This header file defines a canonicalizer output method class for STL
+// strings. Because the canonicalizer tries not to be dependent on the STL,
+// we have segregated it here.
+
+#include <string>
+
+#include "base/compiler_specific.h"
+#include "base/component_export.h"
+#include "base/memory/raw_ptr_exclusion.h"
+#include "base/strings/string_piece.h"
+#include "url/url_canon.h"
+
+namespace url {
+
+// Write into a std::string given in the constructor. This object does not own
+// the string itself, and the user must ensure that the string stays alive
+// throughout the lifetime of this object.
+//
+// The given string will be appended to; any existing data in the string will
+// be preserved.
+//
+// Note that when canonicalization is complete, the string will likely have
+// unused space at the end because we make the string very big to start out
+// with (by |initial_size|). This ends up being important because resize
+// operations are slow, and because the base class needs to write directly
+// into the buffer.
+//
+// Therefore, the user should call Complete() before using the string that
+// this class wrote into.
+class COMPONENT_EXPORT(URL) StdStringCanonOutput : public CanonOutput {
+ public:
+  StdStringCanonOutput(std::string* str);
+
+  StdStringCanonOutput(const StdStringCanonOutput&) = delete;
+  StdStringCanonOutput& operator=(const StdStringCanonOutput&) = delete;
+
+  ~StdStringCanonOutput() override;
+
+  // Must be called after writing has completed but before the string is used.
+  void Complete();
+
+  void Resize(size_t sz) override;
+
+ protected:
+  // `str_` is not a raw_ptr<...> for performance reasons (based on analysis of
+  // sampling profiler data and tab_search:top100:2020).
+  RAW_PTR_EXCLUSION std::string* str_;
+};
+
+// An extension of the Replacements class that allows the setters to use
+// StringPieces (implicitly allowing strings or char*s).
+//
+// The contents of the StringPieces are not copied and must remain valid until
+// the StringPieceReplacements object goes out of scope.
+//
+// In order to make it harder to misuse the API the setters do not accept rvalue
+// references to std::strings.
+// Note: Extra const char* overloads are necessary to break ambiguities that
+// would otherwise exist for char literals.
+template <typename CharT>
+class StringPieceReplacements : public Replacements<CharT> {
+ private:
+  using StringT = std::basic_string<CharT>;
+  using StringPieceT = base::BasicStringPiece<CharT>;
+  using ParentT = Replacements<CharT>;
+  using SetterFun = void (ParentT::*)(const CharT*, const Component&);
+
+  void SetImpl(SetterFun fun, StringPieceT str) {
+    (this->*fun)(str.data(), Component(0, static_cast<int>(str.size())));
+  }
+
+ public:
+  void SetSchemeStr(const CharT* str) { SetImpl(&ParentT::SetScheme, str); }
+  void SetSchemeStr(StringPieceT str) { SetImpl(&ParentT::SetScheme, str); }
+  void SetSchemeStr(const StringT&&) = delete;
+
+  void SetUsernameStr(const CharT* str) { SetImpl(&ParentT::SetUsername, str); }
+  void SetUsernameStr(StringPieceT str) { SetImpl(&ParentT::SetUsername, str); }
+  void SetUsernameStr(const StringT&&) = delete;
+  using ParentT::ClearUsername;
+
+  void SetPasswordStr(const CharT* str) { SetImpl(&ParentT::SetPassword, str); }
+  void SetPasswordStr(StringPieceT str) { SetImpl(&ParentT::SetPassword, str); }
+  void SetPasswordStr(const StringT&&) = delete;
+  using ParentT::ClearPassword;
+
+  void SetHostStr(const CharT* str) { SetImpl(&ParentT::SetHost, str); }
+  void SetHostStr(StringPieceT str) { SetImpl(&ParentT::SetHost, str); }
+  void SetHostStr(const StringT&&) = delete;
+  using ParentT::ClearHost;
+
+  void SetPortStr(const CharT* str) { SetImpl(&ParentT::SetPort, str); }
+  void SetPortStr(StringPieceT str) { SetImpl(&ParentT::SetPort, str); }
+  void SetPortStr(const StringT&&) = delete;
+  using ParentT::ClearPort;
+
+  void SetPathStr(const CharT* str) { SetImpl(&ParentT::SetPath, str); }
+  void SetPathStr(StringPieceT str) { SetImpl(&ParentT::SetPath, str); }
+  void SetPathStr(const StringT&&) = delete;
+  using ParentT::ClearPath;
+
+  void SetQueryStr(const CharT* str) { SetImpl(&ParentT::SetQuery, str); }
+  void SetQueryStr(StringPieceT str) { SetImpl(&ParentT::SetQuery, str); }
+  void SetQueryStr(const StringT&&) = delete;
+  using ParentT::ClearQuery;
+
+  void SetRefStr(const CharT* str) { SetImpl(&ParentT::SetRef, str); }
+  void SetRefStr(StringPieceT str) { SetImpl(&ParentT::SetRef, str); }
+  void SetRefStr(const StringT&&) = delete;
+  using ParentT::ClearRef;
+
+ private:
+  using ParentT::SetHost;
+  using ParentT::SetPassword;
+  using ParentT::SetPath;
+  using ParentT::SetPort;
+  using ParentT::SetQuery;
+  using ParentT::SetRef;
+  using ParentT::SetScheme;
+  using ParentT::SetUsername;
+};
+
+}  // namespace url
+
+#endif  // URL_URL_CANON_STDSTRING_H_
diff --git a/url_canon_stdurl.cc b/url_canon_stdurl.cc
new file mode 100644
index 00000000000..8096b568bc7
--- /dev/null
+++ b/url_canon_stdurl.cc
@@ -0,0 +1,209 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Functions to canonicalize "standard" URLs, which are ones that have an
+// authority section including a host name.
+
+#include "url/url_canon.h"
+#include "url/url_canon_internal.h"
+#include "url/url_constants.h"
+
+namespace url {
+
+namespace {
+
+template <typename CHAR, typename UCHAR>
+bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source,
+                               const Parsed& parsed,
+                               SchemeType scheme_type,
+                               CharsetConverter* query_converter,
+                               CanonOutput* output,
+                               Parsed* new_parsed) {
+  // Scheme: this will append the colon.
+  bool success = CanonicalizeScheme(source.scheme, parsed.scheme,
+                                    output, &new_parsed->scheme);
+
+  bool scheme_supports_user_info =
+      (scheme_type == SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION);
+  bool scheme_supports_ports =
+      (scheme_type == SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION ||
+       scheme_type == SCHEME_WITH_HOST_AND_PORT);
+
+  // Authority (username, password, host, port)
+  bool have_authority;
+  if ((scheme_supports_user_info &&
+       (parsed.username.is_valid() || parsed.password.is_valid())) ||
+      parsed.host.is_nonempty() ||
+      (scheme_supports_ports && parsed.port.is_valid())) {
+    have_authority = true;
+
+    // Only write the authority separators when we have a scheme.
+    if (parsed.scheme.is_valid()) {
+      output->push_back('/');
+      output->push_back('/');
+    }
+
+    // User info: the canonicalizer will handle the : and @.
+    if (scheme_supports_user_info) {
+      success &= CanonicalizeUserInfo(
+          source.username, parsed.username, source.password, parsed.password,
+          output, &new_parsed->username, &new_parsed->password);
+    } else {
+      new_parsed->username.reset();
+      new_parsed->password.reset();
+    }
+
+    success &= CanonicalizeHost(source.host, parsed.host,
+                                output, &new_parsed->host);
+
+    // Host must not be empty for standard URLs.
+    if (parsed.host.is_empty())
+      success = false;
+
+    // Port: the port canonicalizer will handle the colon.
+    if (scheme_supports_ports) {
+      int default_port = DefaultPortForScheme(
+          &output->data()[new_parsed->scheme.begin], new_parsed->scheme.len);
+      success &= CanonicalizePort(source.port, parsed.port, default_port,
+                                  output, &new_parsed->port);
+    } else {
+      new_parsed->port.reset();
+    }
+  } else {
+    // No authority, clear the components.
+    have_authority = false;
+    new_parsed->host.reset();
+    new_parsed->username.reset();
+    new_parsed->password.reset();
+    new_parsed->port.reset();
+    success = false;  // Standard URLs must have an authority.
+  }
+
+  // Path
+  if (parsed.path.is_valid()) {
+    success &= CanonicalizePath(source.path, parsed.path,
+                                output, &new_parsed->path);
+  } else if (have_authority ||
+             parsed.query.is_valid() || parsed.ref.is_valid()) {
+    // When we have an empty path, make up a path when we have an authority
+    // or something following the path. The only time we allow an empty
+    // output path is when there is nothing else.
+    new_parsed->path = Component(output->length(), 1);
+    output->push_back('/');
+  } else {
+    // No path at all
+    new_parsed->path.reset();
+  }
+
+  // Query
+  CanonicalizeQuery(source.query, parsed.query, query_converter,
+                    output, &new_parsed->query);
+
+  // Ref: ignore failure for this, since the page can probably still be loaded.
+  CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
+
+  // Carry over the flag for potentially dangling markup:
+  if (parsed.potentially_dangling_markup)
+    new_parsed->potentially_dangling_markup = true;
+
+  return success;
+}
+
+}  // namespace
+
+// Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED
+// if the scheme is unknown.
+//
+// Please keep blink::DefaultPortForProtocol and url::DefaultPortForProtocol in
+// sync.
+int DefaultPortForScheme(const char* scheme, int scheme_len) {
+  int default_port = PORT_UNSPECIFIED;
+  switch (scheme_len) {
+    case 4:
+      if (!strncmp(scheme, kHttpScheme, scheme_len))
+        default_port = 80;
+      break;
+    case 5:
+      if (!strncmp(scheme, kHttpsScheme, scheme_len))
+        default_port = 443;
+      break;
+    case 3:
+      if (!strncmp(scheme, kFtpScheme, scheme_len))
+        default_port = 21;
+      else if (!strncmp(scheme, kWssScheme, scheme_len))
+        default_port = 443;
+      break;
+    case 2:
+      if (!strncmp(scheme, kWsScheme, scheme_len))
+        default_port = 80;
+      break;
+  }
+  return default_port;
+}
+
+bool CanonicalizeStandardURL(const char* spec,
+                             int spec_len,
+                             const Parsed& parsed,
+                             SchemeType scheme_type,
+                             CharsetConverter* query_converter,
+                             CanonOutput* output,
+                             Parsed* new_parsed) {
+  return DoCanonicalizeStandardURL<char, unsigned char>(
+      URLComponentSource<char>(spec), parsed, scheme_type, query_converter,
+      output, new_parsed);
+}
+
+bool CanonicalizeStandardURL(const char16_t* spec,
+                             int spec_len,
+                             const Parsed& parsed,
+                             SchemeType scheme_type,
+                             CharsetConverter* query_converter,
+                             CanonOutput* output,
+                             Parsed* new_parsed) {
+  return DoCanonicalizeStandardURL<char16_t, char16_t>(
+      URLComponentSource<char16_t>(spec), parsed, scheme_type, query_converter,
+      output, new_parsed);
+}
+
+// It might be nice in the future to optimize this so unchanged components don't
+// need to be recanonicalized. This is especially true since the common case for
+// ReplaceComponents is removing things we don't want, like reference fragments
+// and usernames. These cases can become more efficient if we can assume the
+// rest of the URL is OK with these removed (or only the modified parts
+// recanonicalized). This would be much more complex to implement, however.
+//
+// You would also need to update DoReplaceComponents in url_util.cc which
+// relies on this re-checking everything (see the comment there for why).
+bool ReplaceStandardURL(const char* base,
+                        const Parsed& base_parsed,
+                        const Replacements<char>& replacements,
+                        SchemeType scheme_type,
+                        CharsetConverter* query_converter,
+                        CanonOutput* output,
+                        Parsed* new_parsed) {
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupOverrideComponents(base, replacements, &source, &parsed);
+  return DoCanonicalizeStandardURL<char, unsigned char>(
+      source, parsed, scheme_type, query_converter, output, new_parsed);
+}
+
+// For 16-bit replacements, we turn all the replacements into UTF-8 so the
+// regular code path can be used.
+bool ReplaceStandardURL(const char* base,
+                        const Parsed& base_parsed,
+                        const Replacements<char16_t>& replacements,
+                        SchemeType scheme_type,
+                        CharsetConverter* query_converter,
+                        CanonOutput* output,
+                        Parsed* new_parsed) {
+  RawCanonOutput<1024> utf8;
+  URLComponentSource<char> source(base);
+  Parsed parsed(base_parsed);
+  SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
+  return DoCanonicalizeStandardURL<char, unsigned char>(
+      source, parsed, scheme_type, query_converter, output, new_parsed);
+}
+
+}  // namespace url
diff --git a/url_canon_unittest.cc b/url_canon_unittest.cc
new file mode 100644
index 00000000000..dee00d86e22
--- /dev/null
+++ b/url_canon_unittest.cc
@@ -0,0 +1,2748 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/url_canon.h"
+
+#include <errno.h>
+#include <stddef.h>
+
+#include "base/strings/string_piece.h"
+#include "base/strings/utf_string_conversions.h"
+#include "base/test/gtest_util.h"
+#include "base/test/scoped_feature_list.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/third_party/mozilla/url_parse.h"
+#include "url/url_canon_internal.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_features.h"
+#include "url/url_test_utils.h"
+
+namespace url {
+
+namespace {
+
+struct ComponentCase {
+  const char* input;
+  const char* expected;
+  Component expected_component;
+  bool expected_success;
+};
+
+// ComponentCase but with dual 8-bit/16-bit input. Generally, the unit tests
+// treat each input as optional, and will only try processing if non-NULL.
+// The output is always 8-bit.
+struct DualComponentCase {
+  const char* input8;
+  const wchar_t* input16;
+  const char* expected;
+  Component expected_component;
+  bool expected_success;
+};
+
+// Test cases for CanonicalizeIPAddress(). The inputs are identical to
+// DualComponentCase, but the output has extra CanonHostInfo fields.
+struct IPAddressCase {
+  const char* input8;
+  const wchar_t* input16;
+  const char* expected;
+  Component expected_component;
+
+  // CanonHostInfo fields, for verbose output.
+  CanonHostInfo::Family expected_family;
+  int expected_num_ipv4_components;
+  const char* expected_address_hex;  // Two hex chars per IP address byte.
+};
+
+std::string BytesToHexString(unsigned char bytes[16], int length) {
+  EXPECT_TRUE(length == 0 || length == 4 || length == 16)
+      << "Bad IP address length: " << length;
+  std::string result;
+  for (int i = 0; i < length; ++i) {
+    result.push_back(kHexCharLookup[(bytes[i] >> 4) & 0xf]);
+    result.push_back(kHexCharLookup[bytes[i] & 0xf]);
+  }
+  return result;
+}
+
+struct ReplaceCase {
+  const char* base;
+  const char* scheme;
+  const char* username;
+  const char* password;
+  const char* host;
+  const char* port;
+  const char* path;
+  const char* query;
+  const char* ref;
+  const char* expected;
+};
+
+// Magic string used in the replacements code that tells SetupReplComp to
+// call the clear function.
+const char kDeleteComp[] = "|";
+
+// Sets up a replacement for a single component. This is given pointers to
+// the set and clear function for the component being replaced, and will
+// either set the component (if it exists) or clear it (if the replacement
+// string matches kDeleteComp).
+//
+// This template is currently used only for the 8-bit case, and the strlen
+// causes it to fail in other cases. It is left a template in case we have
+// tests for wide replacements.
+template<typename CHAR>
+void SetupReplComp(
+    void (Replacements<CHAR>::*set)(const CHAR*, const Component&),
+    void (Replacements<CHAR>::*clear)(),
+    Replacements<CHAR>* rep,
+    const CHAR* str) {
+  if (str && str[0] == kDeleteComp[0]) {
+    (rep->*clear)();
+  } else if (str) {
+    (rep->*set)(str, Component(0, static_cast<int>(strlen(str))));
+  }
+}
+
+}  // namespace
+
+TEST(URLCanonTest, DoAppendUTF8) {
+  struct UTF8Case {
+    unsigned input;
+    const char* output;
+  } utf_cases[] = {
+    // Valid code points.
+    {0x24, "\x24"},
+    {0xA2, "\xC2\xA2"},
+    {0x20AC, "\xE2\x82\xAC"},
+    {0x24B62, "\xF0\xA4\xAD\xA2"},
+    {0x10FFFF, "\xF4\x8F\xBF\xBF"},
+  };
+  std::string out_str;
+  for (size_t i = 0; i < std::size(utf_cases); i++) {
+    out_str.clear();
+    StdStringCanonOutput output(&out_str);
+    AppendUTF8Value(utf_cases[i].input, &output);
+    output.Complete();
+    EXPECT_EQ(utf_cases[i].output, out_str);
+  }
+}
+
+TEST(URLCanonTest, DoAppendUTF8Invalid) {
+  std::string out_str;
+  StdStringCanonOutput output(&out_str);
+  // Invalid code point (too large).
+  EXPECT_DCHECK_DEATH({
+    AppendUTF8Value(0x110000, &output);
+    output.Complete();
+  });
+}
+
+TEST(URLCanonTest, UTF) {
+  // Low-level test that we handle reading, canonicalization, and writing
+  // UTF-8/UTF-16 strings properly.
+  struct UTFCase {
+    const char* input8;
+    const wchar_t* input16;
+    bool expected_success;
+    const char* output;
+  } utf_cases[] = {
+      // Valid canonical input should get passed through & escaped.
+      {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true, "%E4%BD%A0%E5%A5%BD"},
+      // Test a character that takes > 16 bits (U+10300 = old italic letter A)
+      {"\xF0\x90\x8C\x80", L"\xd800\xdf00", true, "%F0%90%8C%80"},
+      // Non-shortest-form UTF-8 characters are invalid. The bad bytes should
+      // each be replaced with the invalid character (EF BF DB in UTF-8).
+      {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", nullptr, false,
+       "%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%E5%A5%BD"},
+      // Invalid UTF-8 sequences should be marked as invalid (the first
+      // sequence is truncated).
+      {"\xe4\xa0\xe5\xa5\xbd", L"\xd800\x597d", false, "%EF%BF%BD%E5%A5%BD"},
+      // Character going off the end.
+      {"\xe4\xbd\xa0\xe5\xa5", L"\x4f60\xd800", false, "%E4%BD%A0%EF%BF%BD"},
+      // ...same with low surrogates with no high surrogate.
+      {nullptr, L"\xdc00", false, "%EF%BF%BD"},
+      // Test a UTF-8 encoded surrogate value is marked as invalid.
+      // ED A0 80 = U+D800
+      {"\xed\xa0\x80", nullptr, false, "%EF%BF%BD%EF%BF%BD%EF%BF%BD"},
+      // ...even when paired.
+      {"\xed\xa0\x80\xed\xb0\x80", nullptr, false,
+       "%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD"},
+  };
+
+  std::string out_str;
+  for (size_t i = 0; i < std::size(utf_cases); i++) {
+    if (utf_cases[i].input8) {
+      out_str.clear();
+      StdStringCanonOutput output(&out_str);
+
+      size_t input_len = strlen(utf_cases[i].input8);
+      bool success = true;
+      for (size_t ch = 0; ch < input_len; ch++) {
+        success &= AppendUTF8EscapedChar(utf_cases[i].input8, &ch, input_len,
+                                         &output);
+      }
+      output.Complete();
+      EXPECT_EQ(utf_cases[i].expected_success, success);
+      EXPECT_EQ(std::string(utf_cases[i].output), out_str);
+    }
+    if (utf_cases[i].input16) {
+      out_str.clear();
+      StdStringCanonOutput output(&out_str);
+
+      std::u16string input_str(
+          test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
+      size_t input_len = input_str.length();
+      bool success = true;
+      for (size_t ch = 0; ch < input_len; ch++) {
+        success &= AppendUTF8EscapedChar(input_str.c_str(), &ch, input_len,
+                                         &output);
+      }
+      output.Complete();
+      EXPECT_EQ(utf_cases[i].expected_success, success);
+      EXPECT_EQ(std::string(utf_cases[i].output), out_str);
+    }
+
+    if (utf_cases[i].input8 && utf_cases[i].input16 &&
+        utf_cases[i].expected_success) {
+      // Check that the UTF-8 and UTF-16 inputs are equivalent.
+
+      // UTF-16 -> UTF-8
+      std::string input8_str(utf_cases[i].input8);
+      std::u16string input16_str(
+          test_utils::TruncateWStringToUTF16(utf_cases[i].input16));
+      EXPECT_EQ(input8_str, base::UTF16ToUTF8(input16_str));
+
+      // UTF-8 -> UTF-16
+      EXPECT_EQ(input16_str, base::UTF8ToUTF16(input8_str));
+    }
+  }
+}
+
+TEST(URLCanonTest, Scheme) {
+  // Here, we're mostly testing that unusual characters are handled properly.
+  // The canonicalizer doesn't do any parsing or whitespace detection. It will
+  // also do its best on error, and will escape funny sequences (these won't be
+  // valid schemes and it will return error).
+  //
+  // Note that the canonicalizer will append a colon to the output to separate
+  // out the rest of the URL, which is not present in the input. We check,
+  // however, that the output range includes everything but the colon.
+  ComponentCase scheme_cases[] = {
+    {"http", "http:", Component(0, 4), true},
+    {"HTTP", "http:", Component(0, 4), true},
+    {" HTTP ", "%20http%20:", Component(0, 10), false},
+    {"htt: ", "htt%3A%20:", Component(0, 9), false},
+    {"\xe4\xbd\xa0\xe5\xa5\xbdhttp", "%E4%BD%A0%E5%A5%BDhttp:", Component(0, 22), false},
+      // Don't re-escape something already escaped. Note that it will
+      // "canonicalize" the 'A' to 'a', but that's OK.
+    {"ht%3Atp", "ht%3atp:", Component(0, 7), false},
+    {"", ":", Component(0, 0), false},
+  };
+
+  std::string out_str;
+
+  for (size_t i = 0; i < std::size(scheme_cases); i++) {
+    int url_len = static_cast<int>(strlen(scheme_cases[i].input));
+    Component in_comp(0, url_len);
+    Component out_comp;
+
+    out_str.clear();
+    StdStringCanonOutput output1(&out_str);
+    bool success = CanonicalizeScheme(scheme_cases[i].input, in_comp, &output1,
+                                      &out_comp);
+    output1.Complete();
+
+    EXPECT_EQ(scheme_cases[i].expected_success, success);
+    EXPECT_EQ(std::string(scheme_cases[i].expected), out_str);
+    EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin);
+    EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
+
+    // Now try the wide version.
+    out_str.clear();
+    StdStringCanonOutput output2(&out_str);
+
+    std::u16string wide_input(base::UTF8ToUTF16(scheme_cases[i].input));
+    in_comp.len = static_cast<int>(wide_input.length());
+    success = CanonicalizeScheme(wide_input.c_str(), in_comp, &output2,
+                                 &out_comp);
+    output2.Complete();
+
+    EXPECT_EQ(scheme_cases[i].expected_success, success);
+    EXPECT_EQ(std::string(scheme_cases[i].expected), out_str);
+    EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin);
+    EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len);
+  }
+
+  // Test the case where the scheme is declared nonexistent, it should be
+  // converted into an empty scheme.
+  Component out_comp;
+  out_str.clear();
+  StdStringCanonOutput output(&out_str);
+
+  EXPECT_FALSE(CanonicalizeScheme("", Component(0, -1), &output, &out_comp));
+  output.Complete();
+
+  EXPECT_EQ(std::string(":"), out_str);
+  EXPECT_EQ(0, out_comp.begin);
+  EXPECT_EQ(0, out_comp.len);
+}
+
+// IDNA mode to use in CanonHost tests.
+enum class IDNAMode { kTransitional, kNonTransitional };
+
+class URLCanonHostTest
+    : public ::testing::Test,
+      public ::testing::WithParamInterface<IDNAMode> {
+ public:
+  URLCanonHostTest() {
+    if (GetParam() == IDNAMode::kNonTransitional) {
+      scoped_feature_list_.InitAndEnableFeature(kUseIDNA2008NonTransitional);
+    } else {
+      scoped_feature_list_.InitAndDisableFeature(kUseIDNA2008NonTransitional);
+    }
+  }
+
+ private:
+  base::test::ScopedFeatureList scoped_feature_list_;
+};
+
+INSTANTIATE_TEST_SUITE_P(All,
+                         URLCanonHostTest,
+                         ::testing::Values(IDNAMode::kTransitional,
+                                           IDNAMode::kNonTransitional));
+
+TEST_P(URLCanonHostTest, Host) {
+  bool use_idna_non_transitional = IsUsingIDNA2008NonTransitional();
+
+  IPAddressCase host_cases[] = {
+      // Basic canonicalization, uppercase should be converted to lowercase.
+      {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", Component(0, 10),
+       CanonHostInfo::NEUTRAL, -1, ""},
+      // Spaces and some other characters should be escaped.
+      {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com",
+       Component(0, 22), CanonHostInfo::NEUTRAL, -1, ""},
+      // Exciting different types of spaces!
+      {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", Component(0, 16),
+       CanonHostInfo::NEUTRAL, -1, ""},
+      // Other types of space (no-break, zero-width, zero-width-no-break) are
+      // name-prepped away to nothing.
+      {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", Component(0, 10),
+       CanonHostInfo::NEUTRAL, -1, ""},
+      // Ideographic full stop (full-width period for Chinese, etc.) should be
+      // treated as a dot.
+      {NULL,
+       L"www.foo\x3002"
+       L"bar.com",
+       "www.foo.bar.com", Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""},
+      // Invalid unicode characters should fail...
+      // ...In wide input, ICU will barf and we'll end up with the input as
+      //    escaped UTF-8 (the invalid character should be replaced with the
+      //    replacement character).
+      {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com",
+       Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
+      // ...This is the same as previous but with with escaped.
+      {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com",
+       Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
+      // Test name prepping, fullwidth input should be converted to ASCII and
+      // NOT
+      // IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16.
+      {"\xef\xbc\xa7\xef\xbd\x8f.com", L"\xff27\xff4f.com", "go.com",
+       Component(0, 6), CanonHostInfo::NEUTRAL, -1, ""},
+      // Test that fullwidth escaped values are properly name-prepped,
+      // then converted or rejected.
+      // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input)
+      {"\xef\xbc\x85\xef\xbc\x94\xef\xbc\x91.com", L"\xff05\xff14\xff11.com",
+       "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
+      {"%ef%bc%85%ef%bc%94%ef%bc%91.com", L"%ef%bc%85%ef%bc%94%ef%bc%91.com",
+       "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
+      // ...%00 in fullwidth should fail (also as escaped UTF-8 input)
+      {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com",
+       "%00.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
+      {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com",
+       "%00.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
+      // ICU will convert weird percents into ASCII percents, but not unescape
+      // further. A weird percent is U+FE6A (EF B9 AA in UTF-8) which is a
+      // "small percent". At this point we should be within our rights to mark
+      // anything as invalid since the URL is corrupt or malicious. The code
+      // happens to allow ASCII characters (%41 = "A" -> 'a') to be unescaped
+      // and kept as valid, so we validate that behavior here, but this level
+      // of fixing the input shouldn't be seen as required. "%81" is invalid.
+      {"\xef\xb9\xaa"
+       "41.com",
+       L"\xfe6a"
+       L"41.com",
+       "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
+      {"%ef%b9%aa"
+       "41.com",
+       L"\xfe6a"
+       L"41.com",
+       "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""},
+      {"\xef\xb9\xaa"
+       "81.com",
+       L"\xfe6a"
+       L"81.com",
+       "%81.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
+      {"%ef%b9%aa"
+       "81.com",
+       L"\xfe6a"
+       L"81.com",
+       "%81.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""},
+      // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN
+      {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd",
+       L"\x4f60\x597d\x4f60\x597d", "xn--6qqa088eba", Component(0, 14),
+       CanonHostInfo::NEUTRAL, -1, ""},
+      // See http://unicode.org/cldr/utility/idna.jsp for other
+      // examples/experiments and http://goo.gl/7yG11o
+      // for the full list of characters handled differently by
+      // IDNA 2003, UTS 46 (http://unicode.org/reports/tr46/ ) and IDNA 2008.
+
+      // 4 Deviation characters are mapped/ignored in UTS 46 transitional
+      // mechansm. UTS 46, table 4 row (g).
+      // Sharp-s is mapped to 'ss' in IDNA 2003, not in IDNA 2008 or UTF 46
+      // after transitional period.
+      // Previously, it'd be "fussball.de".
+      {"fu\xc3\x9f"
+       "ball.de",
+       L"fu\x00df"
+       L"ball.de",
+       use_idna_non_transitional ? "xn--fuball-cta.de" : "fussball.de",
+       use_idna_non_transitional ? Component(0, 17) : Component(0, 11),
+       CanonHostInfo::NEUTRAL, -1, ""},
+
+      // Final-sigma (U+03C3) was mapped to regular sigma (U+03C2).
+      // Previously, it'd be "xn--wxaikc9b".
+      {"\xcf\x83\xcf\x8c\xce\xbb\xce\xbf\xcf\x82", L"\x3c3\x3cc\x3bb\x3bf\x3c2",
+       use_idna_non_transitional ? "xn--wxaijb9b" : "xn--wxaikc6b",
+       Component(0, 12), CanonHostInfo::NEUTRAL, -1, ""},
+
+      // ZWNJ (U+200C) and ZWJ (U+200D) are mapped away in UTS 46 transitional
+      // handling as well as in IDNA 2003, but not thereafter.
+      {"a\xe2\x80\x8c"
+       "b\xe2\x80\x8d"
+       "c",
+       L"a\x200c"
+       L"b\x200d"
+       L"c",
+       use_idna_non_transitional ? "xn--abc-9m0ag" : "abc",
+       use_idna_non_transitional ? Component(0, 13) : Component(0, 3),
+       CanonHostInfo::NEUTRAL, -1, ""},
+
+      // ZWJ between Devanagari characters was still mapped away in UTS 46
+      // transitional handling. IDNA 2008 gives xn--11bo0mv54g.
+      // Previously "xn--11bo0m".
+      {"\xe0\xa4\x95\xe0\xa5\x8d\xe2\x80\x8d\xe0\xa4\x9c",
+       L"\x915\x94d\x200d\x91c",
+       use_idna_non_transitional ? "xn--11bo0mv54g" : "xn--11bo0m",
+       use_idna_non_transitional ? Component(0, 14) : Component(0, 10),
+       CanonHostInfo::NEUTRAL, -1, ""},
+
+      // Fullwidth exclamation mark is disallowed. UTS 46, table 4, row (b)
+      // However, we do allow this at the moment because we don't use
+      // STD3 rules and canonicalize full-width ASCII to ASCII.
+      {"wow\xef\xbc\x81", L"wow\xff01", "wow%21", Component(0, 6),
+       CanonHostInfo::NEUTRAL, -1, ""},
+      // U+2132 (turned capital F) is disallowed. UTS 46, table 4, row (c)
+      // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2
+      {"\xe2\x84\xb2oo", L"\x2132oo", "%E2%84%B2oo", Component(0, 11),
+       CanonHostInfo::BROKEN, -1, ""},
+      // U+2F868 (CJK Comp) is disallowed. UTS 46, table 4, row (d)
+      // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2
+      {"\xf0\xaf\xa1\xa8\xe5\xa7\xbb.cn", L"\xd87e\xdc68\x59fb.cn",
+       "%F0%AF%A1%A8%E5%A7%BB.cn", Component(0, 24), CanonHostInfo::BROKEN, -1,
+       ""},
+      // Maps uppercase letters to lower case letters. UTS 46 table 4 row (e)
+      {"M\xc3\x9cNCHEN", L"M\xdcNCHEN", "xn--mnchen-3ya", Component(0, 14),
+       CanonHostInfo::NEUTRAL, -1, ""},
+      // An already-IDNA host is not modified.
+      {"xn--mnchen-3ya", L"xn--mnchen-3ya", "xn--mnchen-3ya", Component(0, 14),
+       CanonHostInfo::NEUTRAL, -1, ""},
+      // Symbol/punctuations are allowed in IDNA 2003/UTS46.
+      // Not allowed in IDNA 2008. UTS 46 table 4 row (f).
+      {"\xe2\x99\xa5ny.us", L"\x2665ny.us", "xn--ny-s0x.us", Component(0, 13),
+       CanonHostInfo::NEUTRAL, -1, ""},
+      // U+11013 is new in Unicode 6.0 and is allowed. UTS 46 table 4, row (h)
+      // We used to allow it because we passed through unassigned code points.
+      {"\xf0\x91\x80\x93.com", L"\xd804\xdc13.com", "xn--n00d.com",
+       Component(0, 12), CanonHostInfo::NEUTRAL, -1, ""},
+      // U+0602 is disallowed in UTS46/IDNA 2008. UTS 46 table 4, row(i)
+      // Used to be allowed in INDA 2003.
+      {"\xd8\x82.eg", L"\x602.eg", "%D8%82.eg", Component(0, 9),
+       CanonHostInfo::BROKEN, -1, ""},
+      // U+20B7 is new in Unicode 5.2 (not a part of IDNA 2003 based
+      // on Unicode 3.2). We did allow it in the past because we let unassigned
+      // code point pass. We continue to allow it even though it's a
+      // "punctuation and symbol" blocked in IDNA 2008.
+      // UTS 46 table 4, row (j)
+      {"\xe2\x82\xb7.com", L"\x20b7.com", "xn--wzg.com", Component(0, 11),
+       CanonHostInfo::NEUTRAL, -1, ""},
+      // Maps uppercase letters to lower case letters.
+      // In IDNA 2003, it's allowed without case-folding
+      // ( xn--bc-7cb.com ) because it's not defined in Unicode 3.2
+      // (added in Unicode 4.1). UTS 46 table 4 row (k)
+      {"bc\xc8\xba.com", L"bc\x23a.com", "xn--bc-is1a.com", Component(0, 15),
+       CanonHostInfo::NEUTRAL, -1, ""},
+      // Maps U+FF43 (Full Width Small Letter C) to 'c'.
+      {"ab\xef\xbd\x83.xyz", L"ab\xff43.xyz", "abc.xyz", Component(0, 7),
+       CanonHostInfo::NEUTRAL, -1, ""},
+      // Maps U+1D68C (Math Monospace Small C) to 'c'.
+      // U+1D68C = \xD835\xDE8C in UTF-16
+      {"ab\xf0\x9d\x9a\x8c.xyz", L"ab\xd835\xde8c.xyz", "abc.xyz",
+       Component(0, 7), CanonHostInfo::NEUTRAL, -1, ""},
+      // BiDi check test
+      // "Divehi" in Divehi (Thaana script) ends with BidiClass=NSM.
+      // Disallowed in IDNA 2003 but now allowed in UTS 46/IDNA 2008.
+      {"\xde\x8b\xde\xa8\xde\x88\xde\xac\xde\x80\xde\xa8",
+       L"\x78b\x7a8\x788\x7ac\x780\x7a8", "xn--hqbpi0jcw", Component(0, 13),
+       CanonHostInfo::NEUTRAL, -1, ""},
+      // Disallowed in both IDNA 2003 and 2008 with BiDi check.
+      // Labels starting with a RTL character cannot end with a LTR character.
+      {"\xd8\xac\xd8\xa7\xd8\xb1xyz", L"\x62c\x627\x631xyz",
+       "%D8%AC%D8%A7%D8%B1xyz", Component(0, 21), CanonHostInfo::BROKEN, -1,
+       ""},
+      // Labels starting with a RTL character can end with BC=EN (European
+      // number). Disallowed in IDNA 2003 but now allowed.
+      {"\xd8\xac\xd8\xa7\xd8\xb1"
+       "2",
+       L"\x62c\x627\x631"
+       L"2",
+       "xn--2-ymcov", Component(0, 11), CanonHostInfo::NEUTRAL, -1, ""},
+      // Labels starting with a RTL character cannot have "L" characters
+      // even if it ends with an BC=EN. Disallowed in both IDNA 2003/2008.
+      {"\xd8\xac\xd8\xa7\xd8\xb1xy2", L"\x62c\x627\x631xy2",
+       "%D8%AC%D8%A7%D8%B1xy2", Component(0, 21), CanonHostInfo::BROKEN, -1,
+       ""},
+      // Labels starting with a RTL character can end with BC=AN (Arabic number)
+      // Disallowed in IDNA 2003, but now allowed.
+      {"\xd8\xac\xd8\xa7\xd8\xb1\xd9\xa2", L"\x62c\x627\x631\x662",
+       "xn--mgbjq0r", Component(0, 11), CanonHostInfo::NEUTRAL, -1, ""},
+      // Labels starting with a RTL character cannot have "L" characters
+      // even if it ends with an BC=AN (Arabic number).
+      // Disallowed in both IDNA 2003/2008.
+      {"\xd8\xac\xd8\xa7\xd8\xb1xy\xd9\xa2", L"\x62c\x627\x631xy\x662",
+       "%D8%AC%D8%A7%D8%B1xy%D9%A2", Component(0, 26), CanonHostInfo::BROKEN,
+       -1, ""},
+      // Labels starting with a RTL character cannot mix BC=EN and BC=AN
+      {"\xd8\xac\xd8\xa7\xd8\xb1xy2\xd9\xa2", L"\x62c\x627\x631xy2\x662",
+       "%D8%AC%D8%A7%D8%B1xy2%D9%A2", Component(0, 27), CanonHostInfo::BROKEN,
+       -1, ""},
+      // As of Unicode 6.2, U+20CF is not assigned. We do not allow it.
+      {"\xe2\x83\x8f.com", L"\x20cf.com", "%E2%83%8F.com", Component(0, 13),
+       CanonHostInfo::BROKEN, -1, ""},
+      // U+0080 is not allowed.
+      {"\xc2\x80.com", L"\x80.com", "%C2%80.com", Component(0, 10),
+       CanonHostInfo::BROKEN, -1, ""},
+      // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped
+      // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped
+      // UTF-8 (wide case). The output should be equivalent to the true wide
+      // character input above).
+      {"%E4%BD%A0%E5%A5%BD\xe4\xbd\xa0\xe5\xa5\xbd",
+       L"%E4%BD%A0%E5%A5%BD\x4f60\x597d", "xn--6qqa088eba", Component(0, 14),
+       CanonHostInfo::NEUTRAL, -1, ""},
+      // Invalid escaped characters should fail and the percents should be
+      // escaped.
+      {"%zz%66%a", L"%zz%66%a", "%25zzf%25a", Component(0, 10),
+       CanonHostInfo::BROKEN, -1, ""},
+      // If we get an invalid character that has been escaped.
+      {"%25", L"%25", "%25", Component(0, 3), CanonHostInfo::BROKEN, -1, ""},
+      {"hello%00", L"hello%00", "hello%00", Component(0, 8),
+       CanonHostInfo::BROKEN, -1, ""},
+      // Escaped numbers should be treated like IP addresses if they are.
+      {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01",
+       "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"},
+      {"%30%78%63%30%2e%30%32%35%30.01%2e",
+       L"%30%78%63%30%2e%30%32%35%30.01%2e", "192.168.0.1", Component(0, 11),
+       CanonHostInfo::IPV4, 3, "C0A80001"},
+      // Invalid escaping should trigger the regular host error handling.
+      {"%3g%78%63%30%2e%30%32%35%30%2E.01",
+       L"%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01",
+       Component(0, 17), CanonHostInfo::BROKEN, -1, ""},
+      // Something that isn't exactly an IP should get treated as a host and
+      // spaces escaped.
+      {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello",
+       Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""},
+      // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP.
+      // These are "0Xc0.0250.01" in fullwidth.
+      {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%"
+       "8E\xef\xbc\x90\xef\xbc\x92\xef\xbc\x95\xef\xbc\x90\xef\xbc%"
+       "8E\xef\xbc\x90\xef\xbc\x91",
+       L"\xff10\xff38\xff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10"
+       L"\xff11",
+       "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"},
+      // Broken IP addresses get marked as such.
+      {"192.168.0.257", L"192.168.0.257", "192.168.0.257", Component(0, 13),
+       CanonHostInfo::BROKEN, -1, ""},
+      {"[google.com]", L"[google.com]", "[google.com]", Component(0, 12),
+       CanonHostInfo::BROKEN, -1, ""},
+      // Cyrillic letter followed by '(' should return punycode for '(' escaped
+      // before punycode string was created. I.e.
+      // if '(' is escaped after punycode is created we would get xn--%28-8tb
+      // (incorrect).
+      {"\xd1\x82(", L"\x0442(", "xn--%28-7ed", Component(0, 11),
+       CanonHostInfo::NEUTRAL, -1, ""},
+      // Address with all hexadecimal characters with leading number of 1<<32
+      // or greater and should return NEUTRAL rather than BROKEN if not all
+      // components are numbers.
+      {"12345678912345.de", L"12345678912345.de", "12345678912345.de",
+       Component(0, 17), CanonHostInfo::NEUTRAL, -1, ""},
+      {"1.12345678912345.de", L"1.12345678912345.de", "1.12345678912345.de",
+       Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""},
+      {"12345678912345.12345678912345.de", L"12345678912345.12345678912345.de",
+       "12345678912345.12345678912345.de", Component(0, 32),
+       CanonHostInfo::NEUTRAL, -1, ""},
+      {"1.2.0xB3A73CE5B59.de", L"1.2.0xB3A73CE5B59.de", "1.2.0xb3a73ce5b59.de",
+       Component(0, 20), CanonHostInfo::NEUTRAL, -1, ""},
+      {"12345678912345.0xde", L"12345678912345.0xde", "12345678912345.0xde",
+       Component(0, 19), CanonHostInfo::BROKEN, -1, ""},
+      // A label that starts with "xn--" but contains non-ASCII characters
+      // should
+      // be an error. Escape the invalid characters.
+      {"xn--m\xc3\xbcnchen", L"xn--m\xfcnchen", "xn--m%C3%BCnchen",
+       Component(0, 16), CanonHostInfo::BROKEN, -1, ""},
+  };
+
+  // CanonicalizeHost() non-verbose.
+  std::string out_str;
+  for (size_t i = 0; i < std::size(host_cases); i++) {
+    // Narrow version.
+    if (host_cases[i].input8) {
+      int host_len = static_cast<int>(strlen(host_cases[i].input8));
+      Component in_comp(0, host_len);
+      Component out_comp;
+
+      out_str.clear();
+      StdStringCanonOutput output(&out_str);
+
+      bool success = CanonicalizeHost(host_cases[i].input8, in_comp, &output,
+                                      &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN,
+                success) << "for input: " << host_cases[i].input8;
+      EXPECT_EQ(std::string(host_cases[i].expected), out_str) <<
+                "for input: " << host_cases[i].input8;
+      EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin) <<
+                "for input: " << host_cases[i].input8;
+      EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len) <<
+                "for input: " << host_cases[i].input8;
+    }
+
+    // Wide version.
+    if (host_cases[i].input16) {
+      std::u16string input16(
+          test_utils::TruncateWStringToUTF16(host_cases[i].input16));
+      int host_len = static_cast<int>(input16.length());
+      Component in_comp(0, host_len);
+      Component out_comp;
+
+      out_str.clear();
+      StdStringCanonOutput output(&out_str);
+
+      bool success = CanonicalizeHost(input16.c_str(), in_comp, &output,
+                                      &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN,
+                success);
+      EXPECT_EQ(std::string(host_cases[i].expected), out_str);
+      EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin);
+      EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len);
+    }
+  }
+
+  // CanonicalizeHostVerbose()
+  for (size_t i = 0; i < std::size(host_cases); i++) {
+    // Narrow version.
+    if (host_cases[i].input8) {
+      int host_len = static_cast<int>(strlen(host_cases[i].input8));
+      Component in_comp(0, host_len);
+
+      out_str.clear();
+      StdStringCanonOutput output(&out_str);
+      CanonHostInfo host_info;
+
+      CanonicalizeHostVerbose(host_cases[i].input8, in_comp, &output,
+                              &host_info);
+      output.Complete();
+
+      EXPECT_EQ(host_cases[i].expected_family, host_info.family);
+      EXPECT_EQ(std::string(host_cases[i].expected), out_str);
+      EXPECT_EQ(host_cases[i].expected_component.begin,
+                host_info.out_host.begin);
+      EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len);
+      EXPECT_EQ(std::string(host_cases[i].expected_address_hex),
+                BytesToHexString(host_info.address, host_info.AddressLength()));
+      if (host_cases[i].expected_family == CanonHostInfo::IPV4) {
+        EXPECT_EQ(host_cases[i].expected_num_ipv4_components,
+                  host_info.num_ipv4_components);
+      }
+    }
+
+    // Wide version.
+    if (host_cases[i].input16) {
+      std::u16string input16(
+          test_utils::TruncateWStringToUTF16(host_cases[i].input16));
+      int host_len = static_cast<int>(input16.length());
+      Component in_comp(0, host_len);
+
+      out_str.clear();
+      StdStringCanonOutput output(&out_str);
+      CanonHostInfo host_info;
+
+      CanonicalizeHostVerbose(input16.c_str(), in_comp, &output, &host_info);
+      output.Complete();
+
+      EXPECT_EQ(host_cases[i].expected_family, host_info.family);
+      EXPECT_EQ(std::string(host_cases[i].expected), out_str);
+      EXPECT_EQ(host_cases[i].expected_component.begin,
+                host_info.out_host.begin);
+      EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len);
+      EXPECT_EQ(std::string(host_cases[i].expected_address_hex),
+                BytesToHexString(host_info.address, host_info.AddressLength()));
+      if (host_cases[i].expected_family == CanonHostInfo::IPV4) {
+        EXPECT_EQ(host_cases[i].expected_num_ipv4_components,
+                  host_info.num_ipv4_components);
+      }
+    }
+  }
+}
+
+TEST(URLCanonTest, IPv4) {
+  // clang-format off
+  IPAddressCase cases[] = {
+    // Empty is not an IP address.
+    {"", L"", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    {".", L".", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    // Regular IP addresses in different bases.
+    {"192.168.0.1", L"192.168.0.1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
+    {"0300.0250.00.01", L"0300.0250.00.01", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
+    {"0xC0.0Xa8.0x0.0x1", L"0xC0.0Xa8.0x0.0x1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
+    // Non-IP addresses due to invalid characters.
+    {"192.168.9.com", L"192.168.9.com", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    // Hostnames with a numeric final component but other components that don't
+    // parse as numbers should be considered broken.
+    {"19a.168.0.1", L"19a.168.0.1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"19a.168.0.1.", L"19a.168.0.1.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0308.0250.00.01", L"0308.0250.00.01", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0308.0250.00.01.", L"0308.0250.00.01.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0xCG.0xA8.0x0.0x1.", L"0xCG.0xA8.0x0.0x1.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    // Non-numeric terminal compeonent should be considered not IPv4 hostnames, but valid.
+    {"19.168.0.1a", L"19.168.0.1a", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    {"0xC.0xA8.0x0.0x1G", L"0xC.0xA8.0x0.0x1G", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    // Hostnames that would be considered broken IPv4 hostnames should be considered valid non-IPv4 hostnames if they end with two dots instead of 0 or 1.
+    {"19a.168.0.1..", L"19a.168.0.1..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    {"0308.0250.00.01..", L"0308.0250.00.01..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    {"0xCG.0xA8.0x0.0x1..", L"0xCG.0xA8.0x0.0x1..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    // Hosts with components that aren't considered valid IPv4 numbers but are entirely numeric should be considered invalid.
+    {"1.2.3.08", L"1.2.3.08", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"1.2.3.08.", L"1.2.3.08.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    // If there are not enough components, the last one should fill them out.
+    {"192", L"192", "0.0.0.192", Component(0, 9), CanonHostInfo::IPV4, 1, "000000C0"},
+    {"0xC0a80001", L"0xC0a80001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"},
+    {"030052000001", L"030052000001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"},
+    {"000030052000001", L"000030052000001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"},
+    {"192.168", L"192.168", "192.0.0.168", Component(0, 11), CanonHostInfo::IPV4, 2, "C00000A8"},
+    {"192.0x00A80001", L"192.0x000A80001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 2, "C0A80001"},
+    {"0xc0.052000001", L"0xc0.052000001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 2, "C0A80001"},
+    {"192.168.1", L"192.168.1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"},
+    // Hostnames with too many components, but a numeric final numeric component are invalid.
+    {"192.168.0.0.1", L"192.168.0.0.1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    // We allow a single trailing dot.
+    {"192.168.0.1.", L"192.168.0.1.", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"},
+    {"192.168.0.1. hello", L"192.168.0.1. hello", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    {"192.168.0.1..", L"192.168.0.1..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    // Hosts with two dots in a row with a final numeric component are considered invalid.
+    {"192.168..1", L"192.168..1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"192.168..1.", L"192.168..1.", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    // Any numerical overflow should be marked as BROKEN.
+    {"0x100.0", L"0x100.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0x100.0.0", L"0x100.0.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0x100.0.0.0", L"0x100.0.0.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0.0x100.0.0", L"0.0x100.0.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0.0.0x100.0", L"0.0.0x100.0", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0.0.0.0x100", L"0.0.0.0x100", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0.0.0x10000", L"0.0.0x10000", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0.0x1000000", L"0.0x1000000", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0x100000000", L"0x100000000", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    // Repeat the previous tests, minus 1, to verify boundaries.
+    {"0xFF.0", L"0xFF.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 2, "FF000000"},
+    {"0xFF.0.0", L"0xFF.0.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 3, "FF000000"},
+    {"0xFF.0.0.0", L"0xFF.0.0.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 4, "FF000000"},
+    {"0.0xFF.0.0", L"0.0xFF.0.0", "0.255.0.0", Component(0, 9), CanonHostInfo::IPV4, 4, "00FF0000"},
+    {"0.0.0xFF.0", L"0.0.0xFF.0", "0.0.255.0", Component(0, 9), CanonHostInfo::IPV4, 4, "0000FF00"},
+    {"0.0.0.0xFF", L"0.0.0.0xFF", "0.0.0.255", Component(0, 9), CanonHostInfo::IPV4, 4, "000000FF"},
+    {"0.0.0xFFFF", L"0.0.0xFFFF", "0.0.255.255", Component(0, 11), CanonHostInfo::IPV4, 3, "0000FFFF"},
+    {"0.0xFFFFFF", L"0.0xFFFFFF", "0.255.255.255", Component(0, 13), CanonHostInfo::IPV4, 2, "00FFFFFF"},
+    {"0xFFFFFFFF", L"0xFFFFFFFF", "255.255.255.255", Component(0, 15), CanonHostInfo::IPV4, 1, "FFFFFFFF"},
+    // Old trunctations tests. They're all "BROKEN" now.
+    {"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"192.168.0.257", L"192.168.0.257", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"192.168.0xa20001", L"192.168.0xa20001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"192.015052000001", L"192.015052000001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"0X12C0a80001", L"0X12C0a80001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"276.1.2", L"276.1.2", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    // Too many components should be rejected, in valid ranges or not.
+    {"255.255.255.255.255", L"255.255.255.255.255", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"256.256.256.256.256", L"256.256.256.256.256", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    // Spaces should be rejected.
+    {"192.168.0.1 hello", L"192.168.0.1 hello", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    // Very large numbers.
+    {"0000000000000300.0x00000000000000fF.00000000000000001", L"0000000000000300.0x00000000000000fF.00000000000000001", "192.255.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0FF0001"},
+    {"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", L"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", "", Component(0, 11), CanonHostInfo::BROKEN, -1, ""},
+    // A number has no length limit, but long numbers can still overflow.
+    {"00000000000000000001", L"00000000000000000001", "0.0.0.1", Component(0, 7), CanonHostInfo::IPV4, 1, "00000001"},
+    {"0000000000000000100000000000000001", L"0000000000000000100000000000000001", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    // If a long component is non-numeric, it's a hostname, *not* a broken IP.
+    {"0.0.0.000000000000000000z", L"0.0.0.000000000000000000z", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    {"0.0.0.100000000000000000z", L"0.0.0.100000000000000000z", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    // Truncation of all zeros should still result in 0.
+    {"0.00.0x.0x0", L"0.00.0x.0x0", "0.0.0.0", Component(0, 7), CanonHostInfo::IPV4, 4, "00000000"},
+    // Non-ASCII characters in final component should return NEUTRAL.
+    {"1.2.3.\xF0\x9F\x92\xA9", L"1.2.3.\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    {"1.2.3.4\xF0\x9F\x92\xA9", L"1.2.3.4\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    {"1.2.3.0x\xF0\x9F\x92\xA9", L"1.2.3.0x\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    {"1.2.3.0\xF0\x9F\x92\xA9", L"1.2.3.0\xD83D\xDCA9", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+    // Non-ASCII characters in other components should result in broken IPs when final component is numeric.
+    {"1.2.\xF0\x9F\x92\xA9.4", L"1.2.\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"1.2.3\xF0\x9F\x92\xA9.4", L"1.2.3\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"1.2.0x\xF0\x9F\x92\xA9.4", L"1.2.0x\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"1.2.0\xF0\x9F\x92\xA9.4", L"1.2.0\xD83D\xDCA9.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"\xF0\x9F\x92\xA9.2.3.4", L"\xD83D\xDCA9.2.3.4", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+  };
+  // clang-format on
+
+  for (const auto& test_case : cases) {
+    SCOPED_TRACE(test_case.input8);
+
+    // 8-bit version.
+    Component component(0, static_cast<int>(strlen(test_case.input8)));
+
+    std::string out_str1;
+    StdStringCanonOutput output1(&out_str1);
+    CanonHostInfo host_info;
+    CanonicalizeIPAddress(test_case.input8, component, &output1, &host_info);
+    output1.Complete();
+
+    EXPECT_EQ(test_case.expected_family, host_info.family);
+    EXPECT_EQ(std::string(test_case.expected_address_hex),
+              BytesToHexString(host_info.address, host_info.AddressLength()));
+    if (host_info.family == CanonHostInfo::IPV4) {
+      EXPECT_STREQ(test_case.expected, out_str1.c_str());
+      EXPECT_EQ(test_case.expected_component.begin, host_info.out_host.begin);
+      EXPECT_EQ(test_case.expected_component.len, host_info.out_host.len);
+      EXPECT_EQ(test_case.expected_num_ipv4_components,
+                host_info.num_ipv4_components);
+    }
+
+    // 16-bit version.
+    std::u16string input16(
+        test_utils::TruncateWStringToUTF16(test_case.input16));
+    component = Component(0, static_cast<int>(input16.length()));
+
+    std::string out_str2;
+    StdStringCanonOutput output2(&out_str2);
+    CanonicalizeIPAddress(input16.c_str(), component, &output2, &host_info);
+    output2.Complete();
+
+    EXPECT_EQ(test_case.expected_family, host_info.family);
+    EXPECT_EQ(std::string(test_case.expected_address_hex),
+              BytesToHexString(host_info.address, host_info.AddressLength()));
+    if (host_info.family == CanonHostInfo::IPV4) {
+      EXPECT_STREQ(test_case.expected, out_str2.c_str());
+      EXPECT_EQ(test_case.expected_component.begin, host_info.out_host.begin);
+      EXPECT_EQ(test_case.expected_component.len, host_info.out_host.len);
+      EXPECT_EQ(test_case.expected_num_ipv4_components,
+                host_info.num_ipv4_components);
+    }
+  }
+}
+
+class URLCanonIPv6Test
+    : public ::testing::Test,
+      public ::testing::WithParamInterface<bool> {
+ public:
+  URLCanonIPv6Test() {
+    if (GetParam()) {
+      scoped_feature_list_.InitAndEnableFeature(kStrictIPv4EmbeddedIPv6AddressParsing);
+    } else {
+      scoped_feature_list_.InitAndDisableFeature(kStrictIPv4EmbeddedIPv6AddressParsing);
+    }
+  }
+
+ private:
+  base::test::ScopedFeatureList scoped_feature_list_;
+};
+
+INSTANTIATE_TEST_SUITE_P(All,
+                         URLCanonIPv6Test,
+                         ::testing::Bool());
+
+TEST_P(URLCanonIPv6Test, IPv6) {
+  bool strict_ipv4_embedded_ipv6_parsing =
+      base::FeatureList::IsEnabled(url::kStrictIPv4EmbeddedIPv6AddressParsing);
+
+  IPAddressCase cases[] = {
+      // Empty is not an IP address.
+    {"", L"", "", Component(), CanonHostInfo::NEUTRAL, -1, ""},
+      // Non-IPs with [:] characters are marked BROKEN.
+    {":", L":", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[", L"[", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[:", L"[:", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"]", L"]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {":]", L":]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[]", L"[]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[:]", L"[:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+      // Regular IP address is invalid without bounding '[' and ']'.
+    {"2001:db8::1", L"2001:db8::1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[2001:db8::1", L"[2001:db8::1", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"2001:db8::1]", L"2001:db8::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+      // Regular IP addresses.
+    {"[::]", L"[::]", "[::]", Component(0,4), CanonHostInfo::IPV6, -1, "00000000000000000000000000000000"},
+    {"[::1]", L"[::1]", "[::1]", Component(0,5), CanonHostInfo::IPV6, -1, "00000000000000000000000000000001"},
+    {"[1::]", L"[1::]", "[1::]", Component(0,5), CanonHostInfo::IPV6, -1, "00010000000000000000000000000000"},
+
+    // Leading zeros should be stripped.
+    {"[000:01:02:003:004:5:6:007]", L"[000:01:02:003:004:5:6:007]", "[0:1:2:3:4:5:6:7]", Component(0,17), CanonHostInfo::IPV6, -1, "00000001000200030004000500060007"},
+
+    // Upper case letters should be lowercased.
+    {"[A:b:c:DE:fF:0:1:aC]", L"[A:b:c:DE:fF:0:1:aC]", "[a:b:c:de:ff:0:1:ac]", Component(0,20), CanonHostInfo::IPV6, -1, "000A000B000C00DE00FF0000000100AC"},
+
+    // The same address can be written with different contractions, but should
+    // get canonicalized to the same thing.
+    {"[1:0:0:2::3:0]", L"[1:0:0:2::3:0]", "[1::2:0:0:3:0]", Component(0,14), CanonHostInfo::IPV6, -1, "00010000000000020000000000030000"},
+    {"[1::2:0:0:3:0]", L"[1::2:0:0:3:0]", "[1::2:0:0:3:0]", Component(0,14), CanonHostInfo::IPV6, -1, "00010000000000020000000000030000"},
+
+    // Addresses with embedded IPv4.
+    {"[::192.168.0.1]", L"[::192.168.0.1]", "[::c0a8:1]", Component(0,10), CanonHostInfo::IPV6, -1, "000000000000000000000000C0A80001"},
+    {"[::ffff:192.168.0.1]", L"[::ffff:192.168.0.1]", "[::ffff:c0a8:1]", Component(0,15), CanonHostInfo::IPV6, -1, "00000000000000000000FFFFC0A80001"},
+    {"[::eeee:192.168.0.1]", L"[::eeee:192.168.0.1]", "[::eeee:c0a8:1]", Component(0, 15), CanonHostInfo::IPV6, -1, "00000000000000000000EEEEC0A80001"},
+    {"[2001::192.168.0.1]", L"[2001::192.168.0.1]", "[2001::c0a8:1]", Component(0, 14), CanonHostInfo::IPV6, -1, "200100000000000000000000C0A80001"},
+    {"[1:2:192.168.0.1:5:6]", L"[1:2:192.168.0.1:5:6]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+
+    // IPv4 embedded IPv6 addresses
+    {"[::ffff:192.1.2]",
+     L"[::ffff:192.1.2]",
+     "[::ffff:c001:2]",
+     strict_ipv4_embedded_ipv6_parsing ? Component() : Component(0,15),
+     strict_ipv4_embedded_ipv6_parsing ? CanonHostInfo::BROKEN : CanonHostInfo::IPV6,
+     -1,
+     (strict_ipv4_embedded_ipv6_parsing ? "" : "00000000000000000000FFFFC0010002")},
+    {"[::ffff:192.1]",
+     L"[::ffff:192.1]",
+     "[::ffff:c000:1]",
+     strict_ipv4_embedded_ipv6_parsing ? Component() : Component(0,15),
+     strict_ipv4_embedded_ipv6_parsing ? CanonHostInfo::BROKEN : CanonHostInfo::IPV6,
+     -1,
+     (strict_ipv4_embedded_ipv6_parsing ? "" : "00000000000000000000FFFFC0000001")},
+    {"[::ffff:192.1.2.3.4]",
+     L"[::ffff:192.1.2.3.4]",
+     "", Component(), CanonHostInfo::BROKEN, -1, ""},
+
+    // IPv4 using hex.
+    // TODO(eroman): Should this format be disallowed?
+    {"[::ffff:0xC0.0Xa8.0x0.0x1]", L"[::ffff:0xC0.0Xa8.0x0.0x1]", "[::ffff:c0a8:1]", Component(0,15), CanonHostInfo::IPV6, -1, "00000000000000000000FFFFC0A80001"},
+
+    // There may be zeros surrounding the "::" contraction.
+    {"[0:0::0:0:8]", L"[0:0::0:0:8]", "[::8]", Component(0,5), CanonHostInfo::IPV6, -1, "00000000000000000000000000000008"},
+
+    {"[2001:db8::1]", L"[2001:db8::1]", "[2001:db8::1]", Component(0,13), CanonHostInfo::IPV6, -1, "20010DB8000000000000000000000001"},
+
+    // Can only have one "::" contraction in an IPv6 string literal.
+    {"[2001::db8::1]", L"[2001::db8::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    // No more than 2 consecutive ':'s.
+    {"[2001:db8:::1]", L"[2001:db8:::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[:::]", L"[:::]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    // Non-IP addresses due to invalid characters.
+    {"[2001::.com]", L"[2001::.com]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    // If there are not enough components, the last one should fill them out.
+    // ... omitted at this time ...
+    // Too many components means not an IP address. Similarly, with too few
+    // if using IPv4 compat or mapped addresses.
+    {"[::192.168.0.0.1]", L"[::192.168.0.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[::ffff:192.168.0.0.1]", L"[::ffff:192.168.0.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[1:2:3:4:5:6:7:8:9]", L"[1:2:3:4:5:6:7:8:9]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    // Too many bits (even though 8 comonents, the last one holds 32 bits).
+    {"[0:0:0:0:0:0:0:192.168.0.1]", L"[0:0:0:0:0:0:0:192.168.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+
+    // Too many bits specified -- the contraction would have to be zero-length
+    // to not exceed 128 bits.
+    {"[1:2:3:4:5:6::192.168.0.1]", L"[1:2:3:4:5:6::192.168.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+
+    // The contraction is for 16 bits of zero.
+    {"[1:2:3:4:5:6::8]", L"[1:2:3:4:5:6::8]", "[1:2:3:4:5:6:0:8]", Component(0,17), CanonHostInfo::IPV6, -1, "00010002000300040005000600000008"},
+
+    // Cannot have a trailing colon.
+    {"[1:2:3:4:5:6:7:8:]", L"[1:2:3:4:5:6:7:8:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[1:2:3:4:5:6:192.168.0.1:]", L"[1:2:3:4:5:6:192.168.0.1:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+
+    // Cannot have negative numbers.
+    {"[-1:2:3:4:5:6:7:8]", L"[-1:2:3:4:5:6:7:8]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+
+    // Scope ID -- the URL may contain an optional ["%" <scope_id>] section.
+    // The scope_id should be included in the canonicalized URL, and is an
+    // unsigned decimal number.
+
+    // Invalid because no ID was given after the percent.
+
+    // Don't allow scope-id
+    {"[1::%1]", L"[1::%1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[1::%eth0]", L"[1::%eth0]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[1::%]", L"[1::%]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[%]", L"[%]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[::%:]", L"[::%:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+
+    // Don't allow leading or trailing colons.
+    {"[:0:0::0:0:8]", L"[:0:0::0:0:8]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[0:0::0:0:8:]", L"[0:0::0:0:8:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+    {"[:0:0::0:0:8:]", L"[:0:0::0:0:8:]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+
+      // We allow a single trailing dot.
+    // ... omitted at this time ...
+      // Two dots in a row means not an IP address.
+    {"[::192.168..1]", L"[::192.168..1]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+      // Any non-first components get truncated to one byte.
+    // ... omitted at this time ...
+      // Spaces should be rejected.
+    {"[::1 hello]", L"[::1 hello]", "", Component(), CanonHostInfo::BROKEN, -1, ""},
+  };
+
+  for (size_t i = 0; i < std::size(cases); i++) {
+    // 8-bit version.
+    Component component(0, static_cast<int>(strlen(cases[i].input8)));
+
+    std::string out_str1;
+    StdStringCanonOutput output1(&out_str1);
+    CanonHostInfo host_info;
+    CanonicalizeIPAddress(cases[i].input8, component, &output1, &host_info);
+    output1.Complete();
+
+    EXPECT_EQ(cases[i].expected_family, host_info.family);
+    EXPECT_EQ(std::string(cases[i].expected_address_hex),
+              BytesToHexString(host_info.address, host_info.AddressLength())) << "iter " << i << " host " << cases[i].input8;
+    if (host_info.family == CanonHostInfo::IPV6) {
+      EXPECT_STREQ(cases[i].expected, out_str1.c_str());
+      EXPECT_EQ(cases[i].expected_component.begin,
+                host_info.out_host.begin);
+      EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
+    }
+
+    // 16-bit version.
+    std::u16string input16(
+        test_utils::TruncateWStringToUTF16(cases[i].input16));
+    component = Component(0, static_cast<int>(input16.length()));
+
+    std::string out_str2;
+    StdStringCanonOutput output2(&out_str2);
+    CanonicalizeIPAddress(input16.c_str(), component, &output2, &host_info);
+    output2.Complete();
+
+    EXPECT_EQ(cases[i].expected_family, host_info.family);
+    EXPECT_EQ(std::string(cases[i].expected_address_hex),
+              BytesToHexString(host_info.address, host_info.AddressLength()));
+    if (host_info.family == CanonHostInfo::IPV6) {
+      EXPECT_STREQ(cases[i].expected, out_str2.c_str());
+      EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin);
+      EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len);
+    }
+  }
+}
+
+TEST(URLCanonTest, IPEmpty) {
+  std::string out_str1;
+  StdStringCanonOutput output1(&out_str1);
+  CanonHostInfo host_info;
+
+  // This tests tests.
+  const char spec[] = "192.168.0.1";
+  CanonicalizeIPAddress(spec, Component(), &output1, &host_info);
+  EXPECT_FALSE(host_info.IsIPAddress());
+
+  CanonicalizeIPAddress(spec, Component(0, 0), &output1, &host_info);
+  EXPECT_FALSE(host_info.IsIPAddress());
+}
+
+// Verifies that CanonicalizeHostSubstring produces the expected output and
+// does not "fix" IP addresses. Because this code is a subset of
+// CanonicalizeHost, the shared functionality is not tested.
+TEST(URLCanonTest, CanonicalizeHostSubstring) {
+  // Basic sanity check.
+  {
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    EXPECT_TRUE(CanonicalizeHostSubstring("M\xc3\x9cNCHEN.com",
+                                          Component(0, 12), &output));
+    output.Complete();
+    EXPECT_EQ("xn--mnchen-3ya.com", out_str);
+  }
+
+  // Failure case.
+  {
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    EXPECT_FALSE(CanonicalizeHostSubstring(
+        test_utils::TruncateWStringToUTF16(L"\xfdd0zyx.com").c_str(),
+        Component(0, 8), &output));
+    output.Complete();
+    EXPECT_EQ("%EF%BF%BDzyx.com", out_str);
+  }
+
+  // Should return true for empty input strings.
+  {
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    EXPECT_TRUE(CanonicalizeHostSubstring("", Component(0, 0), &output));
+    output.Complete();
+    EXPECT_EQ(std::string(), out_str);
+  }
+
+  // Numbers that look like IP addresses should not be changed.
+  {
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    EXPECT_TRUE(
+        CanonicalizeHostSubstring("01.02.03.04", Component(0, 11), &output));
+    output.Complete();
+    EXPECT_EQ("01.02.03.04", out_str);
+  }
+}
+
+TEST(URLCanonTest, UserInfo) {
+  // Note that the canonicalizer should escape and treat empty components as
+  // not being there.
+
+  // We actually parse a full input URL so we can get the initial components.
+  struct UserComponentCase {
+    const char* input;
+    const char* expected;
+    Component expected_username;
+    Component expected_password;
+    bool expected_success;
+  } user_info_cases[] = {
+    {"http://user:pass@host.com/", "user:pass@", Component(0, 4), Component(5, 4), true},
+    {"http://@host.com/", "", Component(0, -1), Component(0, -1), true},
+    {"http://:@host.com/", "", Component(0, -1), Component(0, -1), true},
+    {"http://foo:@host.com/", "foo@", Component(0, 3), Component(0, -1), true},
+    {"http://:foo@host.com/", ":foo@", Component(0, 0), Component(1, 3), true},
+    {"http://^ :$\t@host.com/", "%5E%20:$%09@", Component(0, 6), Component(7, 4), true},
+    {"http://user:pass@/", "user:pass@", Component(0, 4), Component(5, 4), true},
+    {"http://%2540:bar@domain.com/", "%2540:bar@", Component(0, 5), Component(6, 3), true },
+
+      // IE7 compatibility: old versions allowed backslashes in usernames, but
+      // IE7 does not. We disallow it as well.
+    {"ftp://me\\mydomain:pass@foo.com/", "", Component(0, -1), Component(0, -1), true},
+  };
+
+  for (size_t i = 0; i < std::size(user_info_cases); i++) {
+    int url_len = static_cast<int>(strlen(user_info_cases[i].input));
+    Parsed parsed;
+    ParseStandardURL(user_info_cases[i].input, url_len, &parsed);
+    Component out_user, out_pass;
+    std::string out_str;
+    StdStringCanonOutput output1(&out_str);
+
+    bool success = CanonicalizeUserInfo(user_info_cases[i].input,
+                                        parsed.username,
+                                        user_info_cases[i].input,
+                                        parsed.password,
+                                        &output1,
+                                        &out_user,
+                                        &out_pass);
+    output1.Complete();
+
+    EXPECT_EQ(user_info_cases[i].expected_success, success);
+    EXPECT_EQ(std::string(user_info_cases[i].expected), out_str);
+    EXPECT_EQ(user_info_cases[i].expected_username.begin, out_user.begin);
+    EXPECT_EQ(user_info_cases[i].expected_username.len, out_user.len);
+    EXPECT_EQ(user_info_cases[i].expected_password.begin, out_pass.begin);
+    EXPECT_EQ(user_info_cases[i].expected_password.len, out_pass.len);
+
+    // Now try the wide version
+    out_str.clear();
+    StdStringCanonOutput output2(&out_str);
+    std::u16string wide_input(base::UTF8ToUTF16(user_info_cases[i].input));
+    success = CanonicalizeUserInfo(wide_input.c_str(),
+                                   parsed.username,
+                                   wide_input.c_str(),
+                                   parsed.password,
+                                   &output2,
+                                   &out_user,
+                                   &out_pass);
+    output2.Complete();
+
+    EXPECT_EQ(user_info_cases[i].expected_success, success);
+    EXPECT_EQ(std::string(user_info_cases[i].expected), out_str);
+    EXPECT_EQ(user_info_cases[i].expected_username.begin, out_user.begin);
+    EXPECT_EQ(user_info_cases[i].expected_username.len, out_user.len);
+    EXPECT_EQ(user_info_cases[i].expected_password.begin, out_pass.begin);
+    EXPECT_EQ(user_info_cases[i].expected_password.len, out_pass.len);
+  }
+}
+
+TEST(URLCanonTest, Port) {
+  // We only need to test that the number gets properly put into the output
+  // buffer. The parser unit tests will test scanning the number correctly.
+  //
+  // Note that the CanonicalizePort will always prepend a colon to the output
+  // to separate it from the colon that it assumes precedes it.
+  struct PortCase {
+    const char* input;
+    int default_port;
+    const char* expected;
+    Component expected_component;
+    bool expected_success;
+  } port_cases[] = {
+      // Invalid input should be copied w/ failure.
+    {"as df", 80, ":as%20df", Component(1, 7), false},
+    {"-2", 80, ":-2", Component(1, 2), false},
+      // Default port should be omitted.
+    {"80", 80, "", Component(0, -1), true},
+    {"8080", 80, ":8080", Component(1, 4), true},
+      // PORT_UNSPECIFIED should mean always keep the port.
+    {"80", PORT_UNSPECIFIED, ":80", Component(1, 2), true},
+  };
+
+  for (size_t i = 0; i < std::size(port_cases); i++) {
+    int url_len = static_cast<int>(strlen(port_cases[i].input));
+    Component in_comp(0, url_len);
+    Component out_comp;
+    std::string out_str;
+    StdStringCanonOutput output1(&out_str);
+    bool success = CanonicalizePort(port_cases[i].input,
+                                    in_comp,
+                                    port_cases[i].default_port,
+                                    &output1,
+                                    &out_comp);
+    output1.Complete();
+
+    EXPECT_EQ(port_cases[i].expected_success, success);
+    EXPECT_EQ(std::string(port_cases[i].expected), out_str);
+    EXPECT_EQ(port_cases[i].expected_component.begin, out_comp.begin);
+    EXPECT_EQ(port_cases[i].expected_component.len, out_comp.len);
+
+    // Now try the wide version
+    out_str.clear();
+    StdStringCanonOutput output2(&out_str);
+    std::u16string wide_input(base::UTF8ToUTF16(port_cases[i].input));
+    success = CanonicalizePort(wide_input.c_str(),
+                               in_comp,
+                               port_cases[i].default_port,
+                               &output2,
+                               &out_comp);
+    output2.Complete();
+
+    EXPECT_EQ(port_cases[i].expected_success, success);
+    EXPECT_EQ(std::string(port_cases[i].expected), out_str);
+    EXPECT_EQ(port_cases[i].expected_component.begin, out_comp.begin);
+    EXPECT_EQ(port_cases[i].expected_component.len, out_comp.len);
+  }
+}
+
+DualComponentCase kCommonPathCases[] = {
+    // ----- path collapsing tests -----
+    {"/././foo", L"/././foo", "/foo", Component(0, 4), true},
+    {"/./.foo", L"/./.foo", "/.foo", Component(0, 5), true},
+    {"/foo/.", L"/foo/.", "/foo/", Component(0, 5), true},
+    {"/foo/./", L"/foo/./", "/foo/", Component(0, 5), true},
+    // double dots followed by a slash or the end of the string count
+    {"/foo/bar/..", L"/foo/bar/..", "/foo/", Component(0, 5), true},
+    {"/foo/bar/../", L"/foo/bar/../", "/foo/", Component(0, 5), true},
+    // don't count double dots when they aren't followed by a slash
+    {"/foo/..bar", L"/foo/..bar", "/foo/..bar", Component(0, 10), true},
+    // some in the middle
+    {"/foo/bar/../ton", L"/foo/bar/../ton", "/foo/ton", Component(0, 8), true},
+    {"/foo/bar/../ton/../../a", L"/foo/bar/../ton/../../a", "/a",
+     Component(0, 2), true},
+    // we should not be able to go above the root
+    {"/foo/../../..", L"/foo/../../..", "/", Component(0, 1), true},
+    {"/foo/../../../ton", L"/foo/../../../ton", "/ton", Component(0, 4), true},
+    // escaped dots should be unescaped and treated the same as dots
+    {"/foo/%2e", L"/foo/%2e", "/foo/", Component(0, 5), true},
+    {"/foo/%2e%2", L"/foo/%2e%2", "/foo/.%2", Component(0, 8), true},
+    {"/foo/%2e./%2e%2e/.%2e/%2e.bar", L"/foo/%2e./%2e%2e/.%2e/%2e.bar",
+     "/..bar", Component(0, 6), true},
+    // Multiple slashes in a row should be preserved and treated like empty
+    // directory names.
+    {"////../..", L"////../..", "//", Component(0, 2), true},
+
+    // ----- escaping tests -----
+    {"/foo", L"/foo", "/foo", Component(0, 4), true},
+    // Valid escape sequence
+    {"/%20foo", L"/%20foo", "/%20foo", Component(0, 7), true},
+    // Invalid escape sequence we should pass through unchanged.
+    {"/foo%", L"/foo%", "/foo%", Component(0, 5), true},
+    {"/foo%2", L"/foo%2", "/foo%2", Component(0, 6), true},
+    // Invalid escape sequence: bad characters should be treated the same as
+    // the surrounding text, not as escaped (in this case, UTF-8).
+    {"/foo%2zbar", L"/foo%2zbar", "/foo%2zbar", Component(0, 10), true},
+    {"/foo%2\xc2\xa9zbar", nullptr, "/foo%2%C2%A9zbar", Component(0, 16), true},
+    {nullptr, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar", Component(0, 22),
+     true},
+    // Regular characters that are escaped should be unescaped
+    {"/foo%41%7a", L"/foo%41%7a", "/fooAz", Component(0, 6), true},
+    // Funny characters that are unescaped should be escaped
+    {"/foo\x09\x91%91", nullptr, "/foo%09%91%91", Component(0, 13), true},
+    {nullptr, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true},
+    // Invalid characters that are escaped should cause a failure.
+    {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), false},
+    // Some characters should be passed through unchanged regardless of esc.
+    {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13),
+     true},
+    // Characters that are properly escaped should not have the case changed
+    // of hex letters.
+    {"/%3A%3a%3C%3c", L"/%3A%3a%3C%3c", "/%3A%3a%3C%3c", Component(0, 13),
+     true},
+    // Funny characters that are unescaped should be escaped
+    {"/foo\tbar", L"/foo\tbar", "/foo%09bar", Component(0, 10), true},
+    // Backslashes should get converted to forward slashes
+    {"\\foo\\bar", L"\\foo\\bar", "/foo/bar", Component(0, 8), true},
+    // Hashes found in paths (possibly only when the caller explicitly sets
+    // the path on an already-parsed URL) should be escaped.
+    {"/foo#bar", L"/foo#bar", "/foo%23bar", Component(0, 10), true},
+    // %7f should be allowed and %3D should not be unescaped (these were wrong
+    // in a previous version).
+    {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd",
+     "/%7Ffp3%3Eju%3Dduvgw%3Dd", Component(0, 24), true},
+    // @ should be passed through unchanged (escaped or unescaped).
+    {"/@asdf%40", L"/@asdf%40", "/@asdf%40", Component(0, 9), true},
+    // Nested escape sequences should result in escaping the leading '%' if
+    // unescaping would result in a new escape sequence.
+    {"/%A%42", L"/%A%42", "/%25AB", Component(0, 6), true},
+    {"/%%41B", L"/%%41B", "/%25AB", Component(0, 6), true},
+    {"/%%41%42", L"/%%41%42", "/%25AB", Component(0, 6), true},
+    // Make sure truncated "nested" escapes don't result in reading off the
+    // string end.
+    {"/%%41", L"/%%41", "/%A", Component(0, 3), true},
+    // Don't unescape the leading '%' if unescaping doesn't result in a valid
+    // new escape sequence.
+    {"/%%470", L"/%%470", "/%G0", Component(0, 4), true},
+    {"/%%2D%41", L"/%%2D%41", "/%-A", Component(0, 4), true},
+    // Don't erroneously downcast a UTF-16 character in a way that makes it
+    // look like part of an escape sequence.
+    {nullptr, L"/%%41\x0130", "/%A%C4%B0", Component(0, 9), true},
+
+    // ----- encoding tests -----
+    // Basic conversions
+    {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd",
+     L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD",
+     Component(0, 37), true},
+    // Invalid unicode characters should fail. We only do validation on
+    // UTF-16 input, so this doesn't happen on 8-bit.
+    {"/\xef\xb7\x90zyx", nullptr, "/%EF%B7%90zyx", Component(0, 13), true},
+    {nullptr, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false},
+};
+
+typedef bool (*CanonFunc8Bit)(const char*,
+                              const Component&,
+                              CanonOutput*,
+                              Component*);
+typedef bool (*CanonFunc16Bit)(const char16_t*,
+                               const Component&,
+                               CanonOutput*,
+                               Component*);
+
+void DoPathTest(const DualComponentCase* path_cases,
+                size_t num_cases,
+                CanonFunc8Bit canon_func_8,
+                CanonFunc16Bit canon_func_16) {
+  for (size_t i = 0; i < num_cases; i++) {
+    testing::Message scope_message;
+    scope_message << path_cases[i].input8 << "," << path_cases[i].input16;
+    SCOPED_TRACE(scope_message);
+    if (path_cases[i].input8) {
+      int len = static_cast<int>(strlen(path_cases[i].input8));
+      Component in_comp(0, len);
+      Component out_comp;
+      std::string out_str;
+      StdStringCanonOutput output(&out_str);
+      bool success =
+          canon_func_8(path_cases[i].input8, in_comp, &output, &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(path_cases[i].expected_success, success);
+      EXPECT_EQ(path_cases[i].expected_component.begin, out_comp.begin);
+      EXPECT_EQ(path_cases[i].expected_component.len, out_comp.len);
+      EXPECT_EQ(path_cases[i].expected, out_str);
+    }
+
+    if (path_cases[i].input16) {
+      std::u16string input16(
+          test_utils::TruncateWStringToUTF16(path_cases[i].input16));
+      int len = static_cast<int>(input16.length());
+      Component in_comp(0, len);
+      Component out_comp;
+      std::string out_str;
+      StdStringCanonOutput output(&out_str);
+
+      bool success =
+          canon_func_16(input16.c_str(), in_comp, &output, &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(path_cases[i].expected_success, success);
+      EXPECT_EQ(path_cases[i].expected_component.begin, out_comp.begin);
+      EXPECT_EQ(path_cases[i].expected_component.len, out_comp.len);
+      EXPECT_EQ(path_cases[i].expected, out_str);
+    }
+  }
+}
+
+TEST(URLCanonTest, Path) {
+  DoPathTest(kCommonPathCases, std::size(kCommonPathCases), CanonicalizePath,
+             CanonicalizePath);
+
+  // Manual test: embedded NULLs should be escaped and the URL should be marked
+  // as invalid.
+  const char path_with_null[] = "/ab\0c";
+  Component in_comp(0, 5);
+  Component out_comp;
+
+  std::string out_str;
+  StdStringCanonOutput output(&out_str);
+  bool success = CanonicalizePath(path_with_null, in_comp, &output, &out_comp);
+  output.Complete();
+  EXPECT_FALSE(success);
+  EXPECT_EQ("/ab%00c", out_str);
+}
+
+TEST(URLCanonTest, PartialPath) {
+  DualComponentCase partial_path_cases[] = {
+      {".html", L".html", ".html", Component(0, 5), true},
+      {"", L"", "", Component(0, 0), true},
+  };
+
+  DoPathTest(kCommonPathCases, std::size(kCommonPathCases),
+             CanonicalizePartialPath, CanonicalizePartialPath);
+  DoPathTest(partial_path_cases, std::size(partial_path_cases),
+             CanonicalizePartialPath, CanonicalizePartialPath);
+}
+
+TEST(URLCanonTest, Query) {
+  struct QueryCase {
+    const char* input8;
+    const wchar_t* input16;
+    const char* expected;
+  } query_cases[] = {
+      // Regular ASCII case.
+    {"foo=bar", L"foo=bar", "?foo=bar"},
+      // Allow question marks in the query without escaping
+    {"as?df", L"as?df", "?as?df"},
+      // Always escape '#' since it would mark the ref.
+    {"as#df", L"as#df", "?as%23df"},
+      // Escape some questionable 8-bit characters, but never unescape.
+    {"\x02hello\x7f bye", L"\x02hello\x7f bye", "?%02hello%7F%20bye"},
+    {"%40%41123", L"%40%41123", "?%40%41123"},
+      // Chinese input/output
+    {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "?q=%E4%BD%A0%E5%A5%BD"},
+      // Invalid UTF-8/16 input should be replaced with invalid characters.
+    {"q=\xed\xed", L"q=\xd800\xd800", "?q=%EF%BF%BD%EF%BF%BD"},
+      // Don't allow < or > because sometimes they are used for XSS if the
+      // URL is echoed in content. Firefox does this, IE doesn't.
+    {"q=<asdf>", L"q=<asdf>", "?q=%3Casdf%3E"},
+      // Escape double quotemarks in the query.
+    {"q=\"asdf\"", L"q=\"asdf\"", "?q=%22asdf%22"},
+  };
+
+  for (size_t i = 0; i < std::size(query_cases); i++) {
+    Component out_comp;
+
+    if (query_cases[i].input8) {
+      int len = static_cast<int>(strlen(query_cases[i].input8));
+      Component in_comp(0, len);
+      std::string out_str;
+
+      StdStringCanonOutput output(&out_str);
+      CanonicalizeQuery(query_cases[i].input8, in_comp, NULL, &output,
+                        &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(query_cases[i].expected, out_str);
+    }
+
+    if (query_cases[i].input16) {
+      std::u16string input16(
+          test_utils::TruncateWStringToUTF16(query_cases[i].input16));
+      int len = static_cast<int>(input16.length());
+      Component in_comp(0, len);
+      std::string out_str;
+
+      StdStringCanonOutput output(&out_str);
+      CanonicalizeQuery(input16.c_str(), in_comp, NULL, &output, &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(query_cases[i].expected, out_str);
+    }
+  }
+
+  // Extra test for input with embedded NULL;
+  std::string out_str;
+  StdStringCanonOutput output(&out_str);
+  Component out_comp;
+  CanonicalizeQuery("a \x00z\x01", Component(0, 5), NULL, &output, &out_comp);
+  output.Complete();
+  EXPECT_EQ("?a%20%00z%01", out_str);
+}
+
+TEST(URLCanonTest, Ref) {
+  // Refs are trivial, it just checks the encoding.
+  DualComponentCase ref_cases[] = {
+      {"hello!", L"hello!", "#hello!", Component(1, 6), true},
+      // We should escape spaces, double-quotes, angled braces, and backtics.
+      {"hello, world", L"hello, world", "#hello,%20world", Component(1, 14),
+       true},
+      {"hello,\"world", L"hello,\"world", "#hello,%22world", Component(1, 14),
+       true},
+      {"hello,<world", L"hello,<world", "#hello,%3Cworld", Component(1, 14),
+       true},
+      {"hello,>world", L"hello,>world", "#hello,%3Eworld", Component(1, 14),
+       true},
+      {"hello,`world", L"hello,`world", "#hello,%60world", Component(1, 14),
+       true},
+      // UTF-8/wide input should be preserved
+      {"\xc2\xa9", L"\xa9", "#%C2%A9", Component(1, 6), true},
+      // Test a characer that takes > 16 bits (U+10300 = old italic letter A)
+      {"\xF0\x90\x8C\x80ss", L"\xd800\xdf00ss", "#%F0%90%8C%80ss",
+       Component(1, 14), true},
+      // Escaping should be preserved unchanged, even invalid ones
+      {"%41%a", L"%41%a", "#%41%a", Component(1, 5), true},
+      // Invalid UTF-8/16 input should be flagged and the input made valid
+      {"\xc2", nullptr, "#%EF%BF%BD", Component(1, 9), true},
+      {nullptr, L"\xd800\x597d", "#%EF%BF%BD%E5%A5%BD", Component(1, 18), true},
+      // Test a Unicode invalid character.
+      {"a\xef\xb7\x90", L"a\xfdd0", "#a%EF%BF%BD", Component(1, 10), true},
+      // Refs can have # signs and we should preserve them.
+      {"asdf#qwer", L"asdf#qwer", "#asdf#qwer", Component(1, 9), true},
+      {"#asdf", L"#asdf", "##asdf", Component(1, 5), true},
+  };
+
+  for (size_t i = 0; i < std::size(ref_cases); i++) {
+    // 8-bit input
+    if (ref_cases[i].input8) {
+      int len = static_cast<int>(strlen(ref_cases[i].input8));
+      Component in_comp(0, len);
+      Component out_comp;
+
+      std::string out_str;
+      StdStringCanonOutput output(&out_str);
+      CanonicalizeRef(ref_cases[i].input8, in_comp, &output, &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(ref_cases[i].expected_component.begin, out_comp.begin);
+      EXPECT_EQ(ref_cases[i].expected_component.len, out_comp.len);
+      EXPECT_EQ(ref_cases[i].expected, out_str);
+    }
+
+    // 16-bit input
+    if (ref_cases[i].input16) {
+      std::u16string input16(
+          test_utils::TruncateWStringToUTF16(ref_cases[i].input16));
+      int len = static_cast<int>(input16.length());
+      Component in_comp(0, len);
+      Component out_comp;
+
+      std::string out_str;
+      StdStringCanonOutput output(&out_str);
+      CanonicalizeRef(input16.c_str(), in_comp, &output, &out_comp);
+      output.Complete();
+
+      EXPECT_EQ(ref_cases[i].expected_component.begin, out_comp.begin);
+      EXPECT_EQ(ref_cases[i].expected_component.len, out_comp.len);
+      EXPECT_EQ(ref_cases[i].expected, out_str);
+    }
+  }
+
+  // Try one with an embedded NULL. It should be stripped.
+  const char null_input[5] = "ab\x00z";
+  Component null_input_component(0, 4);
+  Component out_comp;
+
+  std::string out_str;
+  StdStringCanonOutput output(&out_str);
+  CanonicalizeRef(null_input, null_input_component, &output, &out_comp);
+  output.Complete();
+
+  EXPECT_EQ(1, out_comp.begin);
+  EXPECT_EQ(6, out_comp.len);
+  EXPECT_EQ("#ab%00z", out_str);
+}
+
+TEST(URLCanonTest, CanonicalizeStandardURL) {
+  // The individual component canonicalize tests should have caught the cases
+  // for each of those components. Here, we just need to test that the various
+  // parts are included or excluded properly, and have the correct separators.
+  struct URLCase {
+    const char* input;
+    const char* expected;
+    bool expected_success;
+  } cases[] = {
+      {"http://www.google.com/foo?bar=baz#",
+       "http://www.google.com/foo?bar=baz#", true},
+      {"http://[www.google.com]/", "http://[www.google.com]/", false},
+      {"ht\ttp:@www.google.com:80/;p?#", "ht%09tp://www.google.com:80/;p?#",
+       false},
+      {"http:////////user:@google.com:99?foo", "http://user@google.com:99/?foo",
+       true},
+      {"www.google.com", ":www.google.com/", false},
+      {"http://192.0x00A80001", "http://192.168.0.1/", true},
+      {"http://www/foo%2Ehtml", "http://www/foo.html", true},
+      {"http://user:pass@/", "http://user:pass@/", false},
+      {"http://%25DOMAIN:foobar@foodomain.com/",
+       "http://%25DOMAIN:foobar@foodomain.com/", true},
+
+      // Backslashes should get converted to forward slashes.
+      {"http:\\\\www.google.com\\foo", "http://www.google.com/foo", true},
+
+      // Busted refs shouldn't make the whole thing fail.
+      {"http://www.google.com/asdf#\xc2",
+       "http://www.google.com/asdf#%EF%BF%BD", true},
+
+      // Basic port tests.
+      {"http://foo:80/", "http://foo/", true},
+      {"http://foo:81/", "http://foo:81/", true},
+      {"httpa://foo:80/", "httpa://foo:80/", true},
+      {"http://foo:-80/", "http://foo:-80/", false},
+
+      {"https://foo:443/", "https://foo/", true},
+      {"https://foo:80/", "https://foo:80/", true},
+      {"ftp://foo:21/", "ftp://foo/", true},
+      {"ftp://foo:80/", "ftp://foo:80/", true},
+      {"gopher://foo:70/", "gopher://foo:70/", true},
+      {"gopher://foo:443/", "gopher://foo:443/", true},
+      {"ws://foo:80/", "ws://foo/", true},
+      {"ws://foo:81/", "ws://foo:81/", true},
+      {"ws://foo:443/", "ws://foo:443/", true},
+      {"ws://foo:815/", "ws://foo:815/", true},
+      {"wss://foo:80/", "wss://foo:80/", true},
+      {"wss://foo:81/", "wss://foo:81/", true},
+      {"wss://foo:443/", "wss://foo/", true},
+      {"wss://foo:815/", "wss://foo:815/", true},
+
+      // This particular code path ends up "backing up" to replace an invalid
+      // host ICU generated with an escaped version. Test that in the context
+      // of a full URL to make sure the backing up doesn't mess up the non-host
+      // parts of the URL. "EF B9 AA" is U+FE6A which is a type of percent that
+      // ICU will convert to an ASCII one, generating "%81".
+      {"ws:)W\x1eW\xef\xb9\xaa"
+       "81:80/",
+       "ws://%29w%1ew%81/", false},
+      // Regression test for the last_invalid_percent_index bug described in
+      // https://crbug.com/1080890#c10.
+      {R"(HTTP:S/5%\../>%41)", "http://s/%3EA", true},
+  };
+
+  for (size_t i = 0; i < std::size(cases); i++) {
+    int url_len = static_cast<int>(strlen(cases[i].input));
+    Parsed parsed;
+    ParseStandardURL(cases[i].input, url_len, &parsed);
+
+    Parsed out_parsed;
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    bool success = CanonicalizeStandardURL(
+        cases[i].input, url_len, parsed,
+        SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL, &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(cases[i].expected_success, success);
+    EXPECT_EQ(cases[i].expected, out_str);
+  }
+}
+
+// The codepath here is the same as for regular canonicalization, so we just
+// need to test that things are replaced or not correctly.
+TEST(URLCanonTest, ReplaceStandardURL) {
+  ReplaceCase replace_cases[] = {
+      // Common case of truncating the path.
+      {"http://www.google.com/foo?bar=baz#ref", nullptr, nullptr, nullptr,
+       nullptr, nullptr, "/", kDeleteComp, kDeleteComp,
+       "http://www.google.com/"},
+      // Replace everything
+      {"http://a:b@google.com:22/foo;bar?baz@cat", "https", "me", "pw",
+       "host.com", "99", "/path", "query", "ref",
+       "https://me:pw@host.com:99/path?query#ref"},
+      // Replace nothing
+      {"http://a:b@google.com:22/foo?baz@cat", nullptr, nullptr, nullptr,
+       nullptr, nullptr, nullptr, nullptr, nullptr,
+       "http://a:b@google.com:22/foo?baz@cat"},
+      // Replace scheme with filesystem. The result is garbage, but you asked
+      // for it.
+      {"http://a:b@google.com:22/foo?baz@cat", "filesystem", nullptr, nullptr,
+       nullptr, nullptr, nullptr, nullptr, nullptr,
+       "filesystem://a:b@google.com:22/foo?baz@cat"},
+  };
+
+  for (size_t i = 0; i < std::size(replace_cases); i++) {
+    const ReplaceCase& cur = replace_cases[i];
+    int base_len = static_cast<int>(strlen(cur.base));
+    Parsed parsed;
+    ParseStandardURL(cur.base, base_len, &parsed);
+
+    Replacements<char> r;
+    typedef Replacements<char> R;  // Clean up syntax.
+
+    // Note that for the scheme we pass in a different clear function since
+    // there is no function to clear the scheme.
+    SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme);
+    SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username);
+    SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password);
+    SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host);
+    SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port);
+    SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path);
+    SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query);
+    SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref);
+
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    Parsed out_parsed;
+    ReplaceStandardURL(replace_cases[i].base, parsed, r,
+                       SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
+                       &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(replace_cases[i].expected, out_str);
+  }
+
+  // The path pointer should be ignored if the address is invalid.
+  {
+    const char src[] = "http://www.google.com/here_is_the_path";
+    int src_len = static_cast<int>(strlen(src));
+
+    Parsed parsed;
+    ParseStandardURL(src, src_len, &parsed);
+
+    // Replace the path to 0 length string. By using 1 as the string address,
+    // the test should get an access violation if it tries to dereference it.
+    Replacements<char> r;
+    r.SetPath(reinterpret_cast<char*>(0x00000001), Component(0, 0));
+    std::string out_str1;
+    StdStringCanonOutput output1(&out_str1);
+    Parsed new_parsed;
+    ReplaceStandardURL(src, parsed, r,
+                       SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
+                       &output1, &new_parsed);
+    output1.Complete();
+    EXPECT_STREQ("http://www.google.com/", out_str1.c_str());
+
+    // Same with an "invalid" path.
+    r.SetPath(reinterpret_cast<char*>(0x00000001), Component());
+    std::string out_str2;
+    StdStringCanonOutput output2(&out_str2);
+    ReplaceStandardURL(src, parsed, r,
+                       SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL,
+                       &output2, &new_parsed);
+    output2.Complete();
+    EXPECT_STREQ("http://www.google.com/", out_str2.c_str());
+  }
+}
+
+TEST(URLCanonTest, ReplaceFileURL) {
+  ReplaceCase replace_cases[] = {
+      // Replace everything
+      {"file:///C:/gaba?query#ref", nullptr, nullptr, nullptr, "filer", nullptr,
+       "/foo", "b", "c", "file://filer/foo?b#c"},
+      // Replace nothing
+      {"file:///C:/gaba?query#ref", nullptr, nullptr, nullptr, nullptr, nullptr,
+       nullptr, nullptr, nullptr, "file:///C:/gaba?query#ref"},
+      {"file:///Y:", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+       nullptr, nullptr, "file:///Y:"},
+      {"file:///Y:/", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+       nullptr, nullptr, "file:///Y:/"},
+      {"file:///./Y", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+       nullptr, nullptr, "file:///Y"},
+      {"file:///./Y:", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+       nullptr, nullptr, "file:///Y:"},
+      // Clear non-path components (common)
+      {"file:///C:/gaba?query#ref", nullptr, nullptr, nullptr, nullptr, nullptr,
+       nullptr, kDeleteComp, kDeleteComp, "file:///C:/gaba"},
+      // Replace path with something that doesn't begin with a slash and make
+      // sure it gets added properly.
+      {"file:///C:/gaba", nullptr, nullptr, nullptr, nullptr, nullptr,
+       "interesting/", nullptr, nullptr, "file:///interesting/"},
+      {"file:///home/gaba?query#ref", nullptr, nullptr, nullptr, "filer",
+       nullptr, "/foo", "b", "c", "file://filer/foo?b#c"},
+      {"file:///home/gaba?query#ref", nullptr, nullptr, nullptr, nullptr,
+       nullptr, nullptr, nullptr, nullptr, "file:///home/gaba?query#ref"},
+      {"file:///home/gaba?query#ref", nullptr, nullptr, nullptr, nullptr,
+       nullptr, nullptr, kDeleteComp, kDeleteComp, "file:///home/gaba"},
+      {"file:///home/gaba", nullptr, nullptr, nullptr, nullptr, nullptr,
+       "interesting/", nullptr, nullptr, "file:///interesting/"},
+      // Replace scheme -- shouldn't do anything.
+      {"file:///C:/gaba?query#ref", "http", nullptr, nullptr, nullptr, nullptr,
+       nullptr, nullptr, nullptr, "file:///C:/gaba?query#ref"},
+  };
+
+  for (size_t i = 0; i < std::size(replace_cases); i++) {
+    const ReplaceCase& cur = replace_cases[i];
+    SCOPED_TRACE(cur.base);
+    int base_len = static_cast<int>(strlen(cur.base));
+    Parsed parsed;
+    ParseFileURL(cur.base, base_len, &parsed);
+
+    Replacements<char> r;
+    typedef Replacements<char> R;  // Clean up syntax.
+    SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme);
+    SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username);
+    SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password);
+    SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host);
+    SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port);
+    SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path);
+    SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query);
+    SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref);
+
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    Parsed out_parsed;
+    ReplaceFileURL(cur.base, parsed, r, NULL, &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(replace_cases[i].expected, out_str);
+  }
+}
+
+TEST(URLCanonTest, ReplaceFileSystemURL) {
+  ReplaceCase replace_cases[] = {
+      // Replace everything in the outer URL.
+      {"filesystem:file:///temporary/gaba?query#ref", nullptr, nullptr, nullptr,
+       nullptr, nullptr, "/foo", "b", "c",
+       "filesystem:file:///temporary/foo?b#c"},
+      // Replace nothing
+      {"filesystem:file:///temporary/gaba?query#ref", nullptr, nullptr, nullptr,
+       nullptr, nullptr, nullptr, nullptr, nullptr,
+       "filesystem:file:///temporary/gaba?query#ref"},
+      // Clear non-path components (common)
+      {"filesystem:file:///temporary/gaba?query#ref", nullptr, nullptr, nullptr,
+       nullptr, nullptr, nullptr, kDeleteComp, kDeleteComp,
+       "filesystem:file:///temporary/gaba"},
+      // Replace path with something that doesn't begin with a slash and make
+      // sure it gets added properly.
+      {"filesystem:file:///temporary/gaba?query#ref", nullptr, nullptr, nullptr,
+       nullptr, nullptr, "interesting/", nullptr, nullptr,
+       "filesystem:file:///temporary/interesting/?query#ref"},
+      // Replace scheme -- shouldn't do anything except canonicalize.
+      {"filesystem:http://u:p@bar.com/t/gaba?query#ref", "http", nullptr,
+       nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+       "filesystem:http://bar.com/t/gaba?query#ref"},
+      // Replace username -- shouldn't do anything except canonicalize.
+      {"filesystem:http://u:p@bar.com/t/gaba?query#ref", nullptr, "u2", nullptr,
+       nullptr, nullptr, nullptr, nullptr, nullptr,
+       "filesystem:http://bar.com/t/gaba?query#ref"},
+      // Replace password -- shouldn't do anything except canonicalize.
+      {"filesystem:http://u:p@bar.com/t/gaba?query#ref", nullptr, nullptr,
+       "pw2", nullptr, nullptr, nullptr, nullptr, nullptr,
+       "filesystem:http://bar.com/t/gaba?query#ref"},
+      // Replace host -- shouldn't do anything except canonicalize.
+      {"filesystem:http://u:p@bar.com:80/t/gaba?query#ref", nullptr, nullptr,
+       nullptr, "foo.com", nullptr, nullptr, nullptr, nullptr,
+       "filesystem:http://bar.com/t/gaba?query#ref"},
+      // Replace port -- shouldn't do anything except canonicalize.
+      {"filesystem:http://u:p@bar.com:40/t/gaba?query#ref", nullptr, nullptr,
+       nullptr, nullptr, "41", nullptr, nullptr, nullptr,
+       "filesystem:http://bar.com:40/t/gaba?query#ref"},
+  };
+
+  for (size_t i = 0; i < std::size(replace_cases); i++) {
+    const ReplaceCase& cur = replace_cases[i];
+    int base_len = static_cast<int>(strlen(cur.base));
+    Parsed parsed;
+    ParseFileSystemURL(cur.base, base_len, &parsed);
+
+    Replacements<char> r;
+    typedef Replacements<char> R;  // Clean up syntax.
+    SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme);
+    SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username);
+    SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password);
+    SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host);
+    SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port);
+    SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path);
+    SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query);
+    SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref);
+
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    Parsed out_parsed;
+    ReplaceFileSystemURL(cur.base, parsed, r, NULL, &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(replace_cases[i].expected, out_str);
+  }
+}
+
+TEST(URLCanonTest, ReplacePathURL) {
+  ReplaceCase replace_cases[] = {
+      // Replace everything
+      {"data:foo", "javascript", nullptr, nullptr, nullptr, nullptr,
+       "alert('foo?');", nullptr, nullptr, "javascript:alert('foo?');"},
+      // Replace nothing
+      {"data:foo", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+       nullptr, nullptr, "data:foo"},
+      // Replace one or the other
+      {"data:foo", "javascript", nullptr, nullptr, nullptr, nullptr, nullptr,
+       nullptr, nullptr, "javascript:foo"},
+      {"data:foo", nullptr, nullptr, nullptr, nullptr, nullptr, "bar", nullptr,
+       nullptr, "data:bar"},
+      {"data:foo", nullptr, nullptr, nullptr, nullptr, nullptr, kDeleteComp,
+       nullptr, nullptr, "data:"},
+  };
+
+  for (size_t i = 0; i < std::size(replace_cases); i++) {
+    const ReplaceCase& cur = replace_cases[i];
+    int base_len = static_cast<int>(strlen(cur.base));
+    Parsed parsed;
+    ParsePathURL(cur.base, base_len, false, &parsed);
+
+    Replacements<char> r;
+    typedef Replacements<char> R;  // Clean up syntax.
+    SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme);
+    SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username);
+    SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password);
+    SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host);
+    SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port);
+    SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path);
+    SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query);
+    SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref);
+
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    Parsed out_parsed;
+    ReplacePathURL(cur.base, parsed, r, &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(replace_cases[i].expected, out_str);
+  }
+}
+
+TEST(URLCanonTest, ReplaceMailtoURL) {
+  ReplaceCase replace_cases[] = {
+      // Replace everything
+    {"mailto:jon@foo.com?body=sup", "mailto", NULL, NULL, NULL, NULL, "addr1", "to=tony", NULL, "mailto:addr1?to=tony"},
+      // Replace nothing
+    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "mailto:jon@foo.com?body=sup"},
+      // Replace the path
+    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "jason", NULL, NULL, "mailto:jason?body=sup"},
+      // Replace the query
+    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "custom=1", NULL, "mailto:jon@foo.com?custom=1"},
+      // Replace the path and query
+    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "jason", "custom=1", NULL, "mailto:jason?custom=1"},
+      // Set the query to empty (should leave trailing question mark)
+    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "", NULL, "mailto:jon@foo.com?"},
+      // Clear the query
+    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "|", NULL, "mailto:jon@foo.com"},
+      // Clear the path
+    {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "|", NULL, NULL, "mailto:?body=sup"},
+      // Clear the path + query
+    {"mailto:", NULL, NULL, NULL, NULL, NULL, "|", "|", NULL, "mailto:"},
+      // Setting the ref should have no effect
+    {"mailto:addr1", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "BLAH", "mailto:addr1"},
+  };
+
+  for (size_t i = 0; i < std::size(replace_cases); i++) {
+    const ReplaceCase& cur = replace_cases[i];
+    int base_len = static_cast<int>(strlen(cur.base));
+    Parsed parsed;
+    ParseMailtoURL(cur.base, base_len, &parsed);
+
+    Replacements<char> r;
+    typedef Replacements<char> R;
+    SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme);
+    SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username);
+    SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password);
+    SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host);
+    SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port);
+    SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path);
+    SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query);
+    SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref);
+
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    Parsed out_parsed;
+    ReplaceMailtoURL(cur.base, parsed, r, &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(replace_cases[i].expected, out_str);
+  }
+}
+
+TEST(URLCanonTest, CanonicalizeFileURL) {
+  struct URLCase {
+    const char* input;
+    const char* expected;
+    bool expected_success;
+    Component expected_host;
+    Component expected_path;
+  } cases[] = {
+#ifdef _WIN32
+      // Windows-style paths
+      {"file:c:\\foo\\bar.html", "file:///C:/foo/bar.html", true, Component(),
+       Component(7, 16)},
+      {"  File:c|////foo\\bar.html", "file:///C:////foo/bar.html", true,
+       Component(), Component(7, 19)},
+      {"file:", "file:///", true, Component(), Component(7, 1)},
+      {"file:UNChost/path", "file://unchost/path", true, Component(7, 7),
+       Component(14, 5)},
+      // CanonicalizeFileURL supports absolute Windows style paths for IE
+      // compatibility. Note that the caller must decide that this is a file
+      // URL itself so it can call the file canonicalizer. This is usually
+      // done automatically as part of relative URL resolving.
+      {"c:\\foo\\bar", "file:///C:/foo/bar", true, Component(),
+       Component(7, 11)},
+      {"C|/foo/bar", "file:///C:/foo/bar", true, Component(), Component(7, 11)},
+      {"/C|\\foo\\bar", "file:///C:/foo/bar", true, Component(),
+       Component(7, 11)},
+      {"//C|/foo/bar", "file:///C:/foo/bar", true, Component(),
+       Component(7, 11)},
+      {"//server/file", "file://server/file", true, Component(7, 6),
+       Component(13, 5)},
+      {"\\\\server\\file", "file://server/file", true, Component(7, 6),
+       Component(13, 5)},
+      {"/\\server/file", "file://server/file", true, Component(7, 6),
+       Component(13, 5)},
+      // We should preserve the number of slashes after the colon for IE
+      // compatibility, except when there is none, in which case we should
+      // add one.
+      {"file:c:foo/bar.html", "file:///C:/foo/bar.html", true, Component(),
+       Component(7, 16)},
+      {"file:/\\/\\C:\\\\//foo\\bar.html", "file:///C:////foo/bar.html", true,
+       Component(), Component(7, 19)},
+      // Three slashes should be non-UNC, even if there is no drive spec (IE
+      // does this, which makes the resulting request invalid).
+      {"file:///foo/bar.txt", "file:///foo/bar.txt", true, Component(),
+       Component(7, 12)},
+      // TODO(brettw) we should probably fail for invalid host names, which
+      // would change the expected result on this test. We also currently allow
+      // colon even though it's probably invalid, because its currently the
+      // "natural" result of the way the canonicalizer is written. There doesn't
+      // seem to be a strong argument for why allowing it here would be bad, so
+      // we just tolerate it and the load will fail later.
+      {"FILE:/\\/\\7:\\\\//foo\\bar.html", "file://7:////foo/bar.html", false,
+       Component(7, 2), Component(9, 16)},
+      {"file:filer/home\\me", "file://filer/home/me", true, Component(7, 5),
+       Component(12, 8)},
+      // Make sure relative paths can't go above the "C:"
+      {"file:///C:/foo/../../../bar.html", "file:///C:/bar.html", true,
+       Component(), Component(7, 12)},
+      // Busted refs shouldn't make the whole thing fail.
+      {"file:///C:/asdf#\xc2", "file:///C:/asdf#%EF%BF%BD", true, Component(),
+       Component(7, 8)},
+      {"file:///./s:", "file:///S:", true, Component(), Component(7, 3)},
+#else
+      // Unix-style paths
+      {"file:///home/me", "file:///home/me", true, Component(),
+       Component(7, 8)},
+      // Windowsy ones should get still treated as Unix-style.
+      {"file:c:\\foo\\bar.html", "file:///c:/foo/bar.html", true, Component(),
+       Component(7, 16)},
+      {"file:c|//foo\\bar.html", "file:///c%7C//foo/bar.html", true,
+       Component(), Component(7, 19)},
+      {"file:///./s:", "file:///s:", true, Component(), Component(7, 3)},
+      // file: tests from WebKit (LayoutTests/fast/loader/url-parse-1.html)
+      {"//", "file:///", true, Component(), Component(7, 1)},
+      {"///", "file:///", true, Component(), Component(7, 1)},
+      {"///test", "file:///test", true, Component(), Component(7, 5)},
+      {"file://test", "file://test/", true, Component(7, 4), Component(11, 1)},
+      {"file://localhost", "file://localhost/", true, Component(7, 9),
+       Component(16, 1)},
+      {"file://localhost/", "file://localhost/", true, Component(7, 9),
+       Component(16, 1)},
+      {"file://localhost/test", "file://localhost/test", true, Component(7, 9),
+       Component(16, 5)},
+#endif  // _WIN32
+  };
+
+  for (size_t i = 0; i < std::size(cases); i++) {
+    int url_len = static_cast<int>(strlen(cases[i].input));
+    Parsed parsed;
+    ParseFileURL(cases[i].input, url_len, &parsed);
+
+    Parsed out_parsed;
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    bool success = CanonicalizeFileURL(cases[i].input, url_len, parsed, NULL,
+                                       &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(cases[i].expected_success, success);
+    EXPECT_EQ(cases[i].expected, out_str);
+
+    // Make sure the spec was properly identified, the file canonicalizer has
+    // different code for writing the spec.
+    EXPECT_EQ(0, out_parsed.scheme.begin);
+    EXPECT_EQ(4, out_parsed.scheme.len);
+
+    EXPECT_EQ(cases[i].expected_host.begin, out_parsed.host.begin);
+    EXPECT_EQ(cases[i].expected_host.len, out_parsed.host.len);
+
+    EXPECT_EQ(cases[i].expected_path.begin, out_parsed.path.begin);
+    EXPECT_EQ(cases[i].expected_path.len, out_parsed.path.len);
+  }
+}
+
+TEST(URLCanonTest, CanonicalizeFileSystemURL) {
+  struct URLCase {
+    const char* input;
+    const char* expected;
+    bool expected_success;
+  } cases[] = {
+      {"Filesystem:htTp://www.Foo.com:80/tempoRary",
+       "filesystem:http://www.foo.com/tempoRary/", true},
+      {"filesystem:httpS://www.foo.com/temporary/",
+       "filesystem:https://www.foo.com/temporary/", true},
+      {"filesystem:http://www.foo.com//", "filesystem:http://www.foo.com//",
+       false},
+      {"filesystem:http://www.foo.com/persistent/bob?query#ref",
+       "filesystem:http://www.foo.com/persistent/bob?query#ref", true},
+      {"filesystem:fIle://\\temporary/", "filesystem:file:///temporary/", true},
+      {"filesystem:fiLe:///temporary", "filesystem:file:///temporary/", true},
+      {"filesystem:File:///temporary/Bob?qUery#reF",
+       "filesystem:file:///temporary/Bob?qUery#reF", true},
+      {"FilEsysteM:htTp:E=/.", "filesystem:http://e%3D//", false},
+  };
+
+  for (size_t i = 0; i < std::size(cases); i++) {
+    int url_len = static_cast<int>(strlen(cases[i].input));
+    Parsed parsed;
+    ParseFileSystemURL(cases[i].input, url_len, &parsed);
+
+    Parsed out_parsed;
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    bool success = CanonicalizeFileSystemURL(cases[i].input, url_len, parsed,
+                                             NULL, &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(cases[i].expected_success, success);
+    EXPECT_EQ(cases[i].expected, out_str);
+
+    // Make sure the spec was properly identified, the filesystem canonicalizer
+    // has different code for writing the spec.
+    EXPECT_EQ(0, out_parsed.scheme.begin);
+    EXPECT_EQ(10, out_parsed.scheme.len);
+    if (success)
+      EXPECT_GT(out_parsed.path.len, 0);
+  }
+}
+
+TEST(URLCanonTest, CanonicalizePathURL) {
+  // Path URLs should get canonicalized schemes but nothing else.
+  struct PathCase {
+    const char* input;
+    const char* expected;
+  } path_cases[] = {
+      {"javascript:", "javascript:"},
+      {"JavaScript:Foo", "javascript:Foo"},
+      {"Foo:\":This /is interesting;?#", "foo:\":This /is interesting;?#"},
+
+      // Validation errors should not cause failure. See
+      // https://crbug.com/925614.
+      {"javascript:\uFFFF", "javascript:%EF%BF%BD"},
+  };
+
+  for (size_t i = 0; i < std::size(path_cases); i++) {
+    int url_len = static_cast<int>(strlen(path_cases[i].input));
+    Parsed parsed;
+    ParsePathURL(path_cases[i].input, url_len, true, &parsed);
+
+    Parsed out_parsed;
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    bool success = CanonicalizePathURL(path_cases[i].input, url_len, parsed,
+                                       &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_TRUE(success);
+    EXPECT_EQ(path_cases[i].expected, out_str);
+
+    EXPECT_EQ(0, out_parsed.host.begin);
+    EXPECT_EQ(-1, out_parsed.host.len);
+
+    // When we end with a colon at the end, there should be no path.
+    if (path_cases[i].input[url_len - 1] == ':') {
+      EXPECT_EQ(0, out_parsed.GetContent().begin);
+      EXPECT_EQ(-1, out_parsed.GetContent().len);
+    }
+  }
+}
+
+TEST(URLCanonTest, CanonicalizePathURLPath) {
+  struct PathCase {
+    std::string input;
+    std::wstring input16;
+    std::string expected;
+  } path_cases[] = {
+      {"Foo", L"Foo", "Foo"},
+      {"\":This /is interesting;?#", L"\":This /is interesting;?#",
+       "\":This /is interesting;?#"},
+      {"\uFFFF", L"\uFFFF", "%EF%BF%BD"},
+  };
+
+  for (size_t i = 0; i < std::size(path_cases); i++) {
+    // 8-bit string input
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    url::Component out_component;
+    CanonicalizePathURLPath(path_cases[i].input.data(),
+                            Component(0, path_cases[i].input.size()), &output,
+                            &out_component);
+    output.Complete();
+
+    EXPECT_EQ(path_cases[i].expected, out_str);
+
+    EXPECT_EQ(0, out_component.begin);
+    EXPECT_EQ(path_cases[i].expected.size(),
+              static_cast<size_t>(out_component.len));
+
+    // 16-bit string input
+    std::string out_str16;
+    StdStringCanonOutput output16(&out_str16);
+    url::Component out_component16;
+    std::u16string input16(
+        test_utils::TruncateWStringToUTF16(path_cases[i].input16.data()));
+    CanonicalizePathURLPath(input16.c_str(),
+                            Component(0, path_cases[i].input16.size()),
+                            &output16, &out_component16);
+    output16.Complete();
+
+    EXPECT_EQ(path_cases[i].expected, out_str16);
+
+    EXPECT_EQ(0, out_component16.begin);
+    EXPECT_EQ(path_cases[i].expected.size(),
+              static_cast<size_t>(out_component16.len));
+  }
+}
+
+TEST(URLCanonTest, CanonicalizeMailtoURL) {
+  struct URLCase {
+    const char* input;
+    const char* expected;
+    bool expected_success;
+    Component expected_path;
+    Component expected_query;
+  } cases[] = {
+    // Null character should be escaped to %00.
+    // Keep this test first in the list as it is handled specially below.
+    {"mailto:addr1\0addr2?foo",
+     "mailto:addr1%00addr2?foo",
+     true, Component(7, 13), Component(21, 3)},
+    {"mailto:addr1",
+     "mailto:addr1",
+     true, Component(7, 5), Component()},
+    {"mailto:addr1@foo.com",
+     "mailto:addr1@foo.com",
+     true, Component(7, 13), Component()},
+    // Trailing whitespace is stripped.
+    {"MaIlTo:addr1 \t ",
+     "mailto:addr1",
+     true, Component(7, 5), Component()},
+    {"MaIlTo:addr1?to=jon",
+     "mailto:addr1?to=jon",
+     true, Component(7, 5), Component(13,6)},
+    {"mailto:addr1,addr2",
+     "mailto:addr1,addr2",
+     true, Component(7, 11), Component()},
+    // Embedded spaces must be encoded.
+    {"mailto:addr1, addr2",
+     "mailto:addr1,%20addr2",
+     true, Component(7, 14), Component()},
+    {"mailto:addr1, addr2?subject=one two ",
+     "mailto:addr1,%20addr2?subject=one%20two",
+     true, Component(7, 14), Component(22, 17)},
+    {"mailto:addr1%2caddr2",
+     "mailto:addr1%2caddr2",
+     true, Component(7, 13), Component()},
+    {"mailto:\xF0\x90\x8C\x80",
+     "mailto:%F0%90%8C%80",
+     true, Component(7, 12), Component()},
+    // Invalid -- UTF-8 encoded surrogate value.
+    {"mailto:\xed\xa0\x80",
+     "mailto:%EF%BF%BD%EF%BF%BD%EF%BF%BD",
+     false, Component(7, 27), Component()},
+    {"mailto:addr1?",
+     "mailto:addr1?",
+     true, Component(7, 5), Component(13, 0)},
+    // Certain characters have special meanings and must be encoded.
+    {"mailto:! \x22$&()+,-./09:;<=>@AZ[\\]&_`az{|}~\x7f?Query! \x22$&()+,-./09:;<=>@AZ[\\]&_`az{|}~",
+     "mailto:!%20%22$&()+,-./09:;%3C=%3E@AZ[\\]&_%60az%7B%7C%7D~%7F?Query!%20%22$&()+,-./09:;%3C=%3E@AZ[\\]&_`az{|}~",
+     true, Component(7, 53), Component(61, 47)},
+  };
+
+  // Define outside of loop to catch bugs where components aren't reset
+  Parsed parsed;
+  Parsed out_parsed;
+
+  for (size_t i = 0; i < std::size(cases); i++) {
+    int url_len = static_cast<int>(strlen(cases[i].input));
+    if (i == 0) {
+      // The first test case purposely has a '\0' in it -- don't count it
+      // as the string terminator.
+      url_len = 22;
+    }
+    ParseMailtoURL(cases[i].input, url_len, &parsed);
+
+    std::string out_str;
+    StdStringCanonOutput output(&out_str);
+    bool success = CanonicalizeMailtoURL(cases[i].input, url_len, parsed,
+                                         &output, &out_parsed);
+    output.Complete();
+
+    EXPECT_EQ(cases[i].expected_success, success);
+    EXPECT_EQ(cases[i].expected, out_str);
+
+    // Make sure the spec was properly identified
+    EXPECT_EQ(0, out_parsed.scheme.begin);
+    EXPECT_EQ(6, out_parsed.scheme.len);
+
+    EXPECT_EQ(cases[i].expected_path.begin, out_parsed.path.begin);
+    EXPECT_EQ(cases[i].expected_path.len, out_parsed.path.len);
+
+    EXPECT_EQ(cases[i].expected_query.begin, out_parsed.query.begin);
+    EXPECT_EQ(cases[i].expected_query.len, out_parsed.query.len);
+  }
+}
+
+#ifndef WIN32
+
+TEST(URLCanonTest, _itoa_s) {
+  // We fill the buffer with 0xff to ensure that it's getting properly
+  // null-terminated. We also allocate one byte more than what we tell
+  // _itoa_s about, and ensure that the extra byte is untouched.
+  char buf[6];
+  memset(buf, 0xff, sizeof(buf));
+  EXPECT_EQ(0, _itoa_s(12, buf, sizeof(buf) - 1, 10));
+  EXPECT_STREQ("12", buf);
+  EXPECT_EQ('\xFF', buf[3]);
+
+  // Test the edge cases - exactly the buffer size and one over
+  memset(buf, 0xff, sizeof(buf));
+  EXPECT_EQ(0, _itoa_s(1234, buf, sizeof(buf) - 1, 10));
+  EXPECT_STREQ("1234", buf);
+  EXPECT_EQ('\xFF', buf[5]);
+
+  memset(buf, 0xff, sizeof(buf));
+  EXPECT_EQ(EINVAL, _itoa_s(12345, buf, sizeof(buf) - 1, 10));
+  EXPECT_EQ('\xFF', buf[5]);  // should never write to this location
+
+  // Test the template overload (note that this will see the full buffer)
+  memset(buf, 0xff, sizeof(buf));
+  EXPECT_EQ(0, _itoa_s(12, buf, 10));
+  EXPECT_STREQ("12", buf);
+  EXPECT_EQ('\xFF', buf[3]);
+
+  memset(buf, 0xff, sizeof(buf));
+  EXPECT_EQ(0, _itoa_s(12345, buf, 10));
+  EXPECT_STREQ("12345", buf);
+
+  EXPECT_EQ(EINVAL, _itoa_s(123456, buf, 10));
+
+  // Test that radix 16 is supported.
+  memset(buf, 0xff, sizeof(buf));
+  EXPECT_EQ(0, _itoa_s(1234, buf, sizeof(buf) - 1, 16));
+  EXPECT_STREQ("4d2", buf);
+  EXPECT_EQ('\xFF', buf[5]);
+}
+
+TEST(URLCanonTest, _itow_s) {
+  // We fill the buffer with 0xff to ensure that it's getting properly
+  // null-terminated. We also allocate one byte more than what we tell
+  // _itoa_s about, and ensure that the extra byte is untouched.
+  char16_t buf[6];
+  const char fill_mem = 0xff;
+  const char16_t fill_char = 0xffff;
+  memset(buf, fill_mem, sizeof(buf));
+  EXPECT_EQ(0, _itow_s(12, buf, sizeof(buf) / 2 - 1, 10));
+  EXPECT_EQ(u"12", std::u16string(buf));
+  EXPECT_EQ(fill_char, buf[3]);
+
+  // Test the edge cases - exactly the buffer size and one over
+  EXPECT_EQ(0, _itow_s(1234, buf, sizeof(buf) / 2 - 1, 10));
+  EXPECT_EQ(u"1234", std::u16string(buf));
+  EXPECT_EQ(fill_char, buf[5]);
+
+  memset(buf, fill_mem, sizeof(buf));
+  EXPECT_EQ(EINVAL, _itow_s(12345, buf, sizeof(buf) / 2 - 1, 10));
+  EXPECT_EQ(fill_char, buf[5]);  // should never write to this location
+
+  // Test the template overload (note that this will see the full buffer)
+  memset(buf, fill_mem, sizeof(buf));
+  EXPECT_EQ(0, _itow_s(12, buf, 10));
+  EXPECT_EQ(u"12", std::u16string(buf));
+  EXPECT_EQ(fill_char, buf[3]);
+
+  memset(buf, fill_mem, sizeof(buf));
+  EXPECT_EQ(0, _itow_s(12345, buf, 10));
+  EXPECT_EQ(u"12345", std::u16string(buf));
+
+  EXPECT_EQ(EINVAL, _itow_s(123456, buf, 10));
+}
+
+#endif  // !WIN32
+
+// Returns true if the given two structures are the same.
+static bool ParsedIsEqual(const Parsed& a, const Parsed& b) {
+  return a.scheme.begin == b.scheme.begin && a.scheme.len == b.scheme.len &&
+         a.username.begin == b.username.begin && a.username.len == b.username.len &&
+         a.password.begin == b.password.begin && a.password.len == b.password.len &&
+         a.host.begin == b.host.begin && a.host.len == b.host.len &&
+         a.port.begin == b.port.begin && a.port.len == b.port.len &&
+         a.path.begin == b.path.begin && a.path.len == b.path.len &&
+         a.query.begin == b.query.begin && a.query.len == b.query.len &&
+         a.ref.begin == b.ref.begin && a.ref.len == b.ref.len;
+}
+
+TEST(URLCanonTest, ResolveRelativeURL) {
+  struct RelativeCase {
+    const char* base;      // Input base URL: MUST BE CANONICAL
+    bool is_base_hier;     // Is the base URL hierarchical
+    bool is_base_file;     // Tells us if the base is a file URL.
+    const char* test;      // Input URL to test against.
+    bool succeed_relative; // Whether we expect IsRelativeURL to succeed
+    bool is_rel;           // Whether we expect |test| to be relative or not.
+    bool succeed_resolve;  // Whether we expect ResolveRelativeURL to succeed.
+    const char* resolved;  // What we expect in the result when resolving.
+  } rel_cases[] = {
+      // Basic absolute input.
+    {"http://host/a", true, false, "http://another/", true, false, false, NULL},
+    {"http://host/a", true, false, "http:////another/", true, false, false, NULL},
+      // Empty relative URLs should only remove the ref part of the URL,
+      // leaving the rest unchanged.
+    {"http://foo/bar", true, false, "", true, true, true, "http://foo/bar"},
+    {"http://foo/bar#ref", true, false, "", true, true, true, "http://foo/bar"},
+    {"http://foo/bar#", true, false, "", true, true, true, "http://foo/bar"},
+      // Spaces at the ends of the relative path should be ignored.
+    {"http://foo/bar", true, false, "  another  ", true, true, true, "http://foo/another"},
+    {"http://foo/bar", true, false, "  .  ", true, true, true, "http://foo/"},
+    {"http://foo/bar", true, false, " \t ", true, true, true, "http://foo/bar"},
+      // Matching schemes without two slashes are treated as relative.
+    {"http://host/a", true, false, "http:path", true, true, true, "http://host/path"},
+    {"http://host/a/", true, false, "http:path", true, true, true, "http://host/a/path"},
+    {"http://host/a", true, false, "http:/path", true, true, true, "http://host/path"},
+    {"http://host/a", true, false, "HTTP:/path", true, true, true, "http://host/path"},
+      // Nonmatching schemes are absolute.
+    {"http://host/a", true, false, "https:host2", true, false, false, NULL},
+    {"http://host/a", true, false, "htto:/host2", true, false, false, NULL},
+      // Absolute path input
+    {"http://host/a", true, false, "/b/c/d", true, true, true, "http://host/b/c/d"},
+    {"http://host/a", true, false, "\\b\\c\\d", true, true, true, "http://host/b/c/d"},
+    {"http://host/a", true, false, "/b/../c", true, true, true, "http://host/c"},
+    {"http://host/a?b#c", true, false, "/b/../c", true, true, true, "http://host/c"},
+    {"http://host/a", true, false, "\\b/../c?x#y", true, true, true, "http://host/c?x#y"},
+    {"http://host/a?b#c", true, false, "/b/../c?x#y", true, true, true, "http://host/c?x#y"},
+      // Relative path input
+    {"http://host/a", true, false, "b", true, true, true, "http://host/b"},
+    {"http://host/a", true, false, "bc/de", true, true, true, "http://host/bc/de"},
+    {"http://host/a/", true, false, "bc/de?query#ref", true, true, true, "http://host/a/bc/de?query#ref"},
+    {"http://host/a/", true, false, ".", true, true, true, "http://host/a/"},
+    {"http://host/a/", true, false, "..", true, true, true, "http://host/"},
+    {"http://host/a/", true, false, "./..", true, true, true, "http://host/"},
+    {"http://host/a/", true, false, "../.", true, true, true, "http://host/"},
+    {"http://host/a/", true, false, "././.", true, true, true, "http://host/a/"},
+    {"http://host/a?query#ref", true, false, "../../../foo", true, true, true, "http://host/foo"},
+      // Query input
+    {"http://host/a", true, false, "?foo=bar", true, true, true, "http://host/a?foo=bar"},
+    {"http://host/a?x=y#z", true, false, "?", true, true, true, "http://host/a?"},
+    {"http://host/a?x=y#z", true, false, "?foo=bar#com", true, true, true, "http://host/a?foo=bar#com"},
+      // Ref input
+    {"http://host/a", true, false, "#ref", true, true, true, "http://host/a#ref"},
+    {"http://host/a#b", true, false, "#", true, true, true, "http://host/a#"},
+    {"http://host/a?foo=bar#hello", true, false, "#bye", true, true, true, "http://host/a?foo=bar#bye"},
+      // Non-hierarchical base: no relative handling. Relative input should
+      // error, and if a scheme is present, it should be treated as absolute.
+    {"data:foobar", false, false, "baz.html", false, false, false, NULL},
+    {"data:foobar", false, false, "data:baz", true, false, false, NULL},
+    {"data:foobar", false, false, "data:/base", true, false, false, NULL},
+      // Non-hierarchical base: absolute input should succeed.
+    {"data:foobar", false, false, "http://host/", true, false, false, NULL},
+    {"data:foobar", false, false, "http:host", true, false, false, NULL},
+      // Non-hierarchical base: empty URL should give error.
+    {"data:foobar", false, false, "", false, false, false, NULL},
+      // Invalid schemes should be treated as relative.
+    {"http://foo/bar", true, false, "./asd:fgh", true, true, true, "http://foo/asd:fgh"},
+    {"http://foo/bar", true, false, ":foo", true, true, true, "http://foo/:foo"},
+    {"http://foo/bar", true, false, " hello world", true, true, true, "http://foo/hello%20world"},
+    {"data:asdf", false, false, ":foo", false, false, false, NULL},
+    {"data:asdf", false, false, "bad(':foo')", false, false, false, NULL},
+      // We should treat semicolons like any other character in URL resolving
+    {"http://host/a", true, false, ";foo", true, true, true, "http://host/;foo"},
+    {"http://host/a;", true, false, ";foo", true, true, true, "http://host/;foo"},
+    {"http://host/a", true, false, ";/../bar", true, true, true, "http://host/bar"},
+      // Relative URLs can also be written as "//foo/bar" which is relative to
+      // the scheme. In this case, it would take the old scheme, so for http
+      // the example would resolve to "http://foo/bar".
+    {"http://host/a", true, false, "//another", true, true, true, "http://another/"},
+    {"http://host/a", true, false, "//another/path?query#ref", true, true, true, "http://another/path?query#ref"},
+    {"http://host/a", true, false, "///another/path", true, true, true, "http://another/path"},
+    {"http://host/a", true, false, "//Another\\path", true, true, true, "http://another/path"},
+    {"http://host/a", true, false, "//", true, true, false, "http:"},
+      // IE will also allow one or the other to be a backslash to get the same
+      // behavior.
+    {"http://host/a", true, false, "\\/another/path", true, true, true, "http://another/path"},
+    {"http://host/a", true, false, "/\\Another\\path", true, true, true, "http://another/path"},
+#ifdef WIN32
+      // Resolving against Windows file base URLs.
+    {"file:///C:/foo", true, true, "http://host/", true, false, false, NULL},
+    {"file:///C:/foo", true, true, "bar", true, true, true, "file:///C:/bar"},
+    {"file:///C:/foo", true, true, "../../../bar.html", true, true, true, "file:///C:/bar.html"},
+    {"file:///C:/foo", true, true, "/../bar.html", true, true, true, "file:///C:/bar.html"},
+      // But two backslashes on Windows should be UNC so should be treated
+      // as absolute.
+    {"http://host/a", true, false, "\\\\another\\path", true, false, false, NULL},
+      // IE doesn't support drive specs starting with two slashes. It fails
+      // immediately and doesn't even try to load. We fix it up to either
+      // an absolute path or UNC depending on what it looks like.
+    {"file:///C:/something", true, true, "//c:/foo", true, true, true, "file:///C:/foo"},
+    {"file:///C:/something", true, true, "//localhost/c:/foo", true, true, true, "file:///C:/foo"},
+      // Windows drive specs should be allowed and treated as absolute.
+    {"file:///C:/foo", true, true, "c:", true, false, false, NULL},
+    {"file:///C:/foo", true, true, "c:/foo", true, false, false, NULL},
+    {"http://host/a", true, false, "c:\\foo", true, false, false, NULL},
+      // Relative paths with drive letters should be allowed when the base is
+      // also a file.
+    {"file:///C:/foo", true, true, "/z:/bar", true, true, true, "file:///Z:/bar"},
+      // Treat absolute paths as being off of the drive.
+    {"file:///C:/foo", true, true, "/bar", true, true, true, "file:///C:/bar"},
+    {"file://localhost/C:/foo", true, true, "/bar", true, true, true, "file://localhost/C:/bar"},
+    {"file:///C:/foo/com/", true, true, "/bar", true, true, true, "file:///C:/bar"},
+      // On Windows, two slashes without a drive letter when the base is a file
+      // means that the path is UNC.
+    {"file:///C:/something", true, true, "//somehost/path", true, true, true, "file://somehost/path"},
+    {"file:///C:/something", true, true, "/\\//somehost/path", true, true, true, "file://somehost/path"},
+#else
+      // On Unix we fall back to relative behavior since there's nothing else
+      // reasonable to do.
+    {"http://host/a", true, false, "\\\\Another\\path", true, true, true, "http://another/path"},
+#endif
+      // Even on Windows, we don't allow relative drive specs when the base
+      // is not file.
+    {"http://host/a", true, false, "/c:\\foo", true, true, true, "http://host/c:/foo"},
+    {"http://host/a", true, false, "//c:\\foo", true, true, true, "http://c/foo"},
+      // Cross-platform relative file: resolution behavior.
+    {"file://host/a", true, true, "/", true, true, true, "file://host/"},
+    {"file://host/a", true, true, "//", true, true, true, "file:///"},
+    {"file://host/a", true, true, "/b", true, true, true, "file://host/b"},
+    {"file://host/a", true, true, "//b", true, true, true, "file://b/"},
+      // Ensure that ports aren't allowed for hosts relative to a file url.
+      // Although the result string shows a host:port portion, the call to
+      // resolve the relative URL returns false, indicating parse failure,
+      // which is what is required.
+    {"file:///foo.txt", true, true, "//host:80/bar.txt", true, true, false, "file://host:80/bar.txt"},
+      // Filesystem URL tests; filesystem URLs are only valid and relative if
+      // they have no scheme, e.g. "./index.html". There's no valid equivalent
+      // to http:index.html.
+    {"filesystem:http://host/t/path", true, false, "filesystem:http://host/t/path2", true, false, false, NULL},
+    {"filesystem:http://host/t/path", true, false, "filesystem:https://host/t/path2", true, false, false, NULL},
+    {"filesystem:http://host/t/path", true, false, "http://host/t/path2", true, false, false, NULL},
+    {"http://host/t/path", true, false, "filesystem:http://host/t/path2", true, false, false, NULL},
+    {"filesystem:http://host/t/path", true, false, "./path2", true, true, true, "filesystem:http://host/t/path2"},
+    {"filesystem:http://host/t/path/", true, false, "path2", true, true, true, "filesystem:http://host/t/path/path2"},
+    {"filesystem:http://host/t/path", true, false, "filesystem:http:path2", true, false, false, NULL},
+      // Absolute URLs are still not relative to a non-standard base URL.
+    {"about:blank", false, false, "http://X/A", true, false, true, ""},
+    {"about:blank", false, false, "content://content.Provider/", true, false, true, ""},
+  };
+
+  for (size_t i = 0; i < std::size(rel_cases); i++) {
+    const RelativeCase& cur_case = rel_cases[i];
+
+    Parsed parsed;
+    int base_len = static_cast<int>(strlen(cur_case.base));
+    if (cur_case.is_base_file)
+      ParseFileURL(cur_case.base, base_len, &parsed);
+    else if (cur_case.is_base_hier)
+      ParseStandardURL(cur_case.base, base_len, &parsed);
+    else
+      ParsePathURL(cur_case.base, base_len, false, &parsed);
+
+    // First see if it is relative.
+    int test_len = static_cast<int>(strlen(cur_case.test));
+    bool is_relative;
+    Component relative_component;
+    bool succeed_is_rel = IsRelativeURL(
+        cur_case.base, parsed, cur_case.test, test_len, cur_case.is_base_hier,
+        &is_relative, &relative_component);
+
+    EXPECT_EQ(cur_case.succeed_relative, succeed_is_rel) <<
+        "succeed is rel failure on " << cur_case.test;
+    EXPECT_EQ(cur_case.is_rel, is_relative) <<
+        "is rel failure on " << cur_case.test;
+    // Now resolve it.
+    if (succeed_is_rel && is_relative && cur_case.is_rel) {
+      std::string resolved;
+      StdStringCanonOutput output(&resolved);
+      Parsed resolved_parsed;
+
+      bool succeed_resolve = ResolveRelativeURL(
+          cur_case.base, parsed, cur_case.is_base_file, cur_case.test,
+          relative_component, NULL, &output, &resolved_parsed);
+      output.Complete();
+
+      EXPECT_EQ(cur_case.succeed_resolve, succeed_resolve);
+      EXPECT_EQ(cur_case.resolved, resolved) << " on " << cur_case.test;
+
+      // Verify that the output parsed structure is the same as parsing a
+      // the URL freshly.
+      Parsed ref_parsed;
+      int resolved_len = static_cast<int>(resolved.size());
+      if (cur_case.is_base_file) {
+        ParseFileURL(resolved.c_str(), resolved_len, &ref_parsed);
+      } else if (cur_case.is_base_hier) {
+        ParseStandardURL(resolved.c_str(), resolved_len, &ref_parsed);
+      } else {
+        ParsePathURL(resolved.c_str(), resolved_len, false, &ref_parsed);
+      }
+      EXPECT_TRUE(ParsedIsEqual(ref_parsed, resolved_parsed));
+    }
+  }
+}
+
+// It used to be the case that when we did a replacement with a long buffer of
+// UTF-16 characters, we would get invalid data in the URL. This is because the
+// buffer that it used to hold the UTF-8 data was resized, while some pointers
+// were still kept to the old buffer that was removed.
+TEST(URLCanonTest, ReplacementOverflow) {
+  const char src[] = "file:///C:/foo/bar";
+  int src_len = static_cast<int>(strlen(src));
+  Parsed parsed;
+  ParseFileURL(src, src_len, &parsed);
+
+  // Override two components, the path with something short, and the query with
+  // something long enough to trigger the bug.
+  Replacements<char16_t> repl;
+  std::u16string new_query;
+  for (int i = 0; i < 4800; i++)
+    new_query.push_back('a');
+
+  std::u16string new_path(test_utils::TruncateWStringToUTF16(L"/foo"));
+  repl.SetPath(new_path.c_str(), Component(0, 4));
+  repl.SetQuery(new_query.c_str(),
+                Component(0, static_cast<int>(new_query.length())));
+
+  // Call ReplaceComponents on the string. It doesn't matter if we call it for
+  // standard URLs, file URLs, etc, since they will go to the same replacement
+  // function that was buggy.
+  Parsed repl_parsed;
+  std::string repl_str;
+  StdStringCanonOutput repl_output(&repl_str);
+  ReplaceFileURL(src, parsed, repl, NULL, &repl_output, &repl_parsed);
+  repl_output.Complete();
+
+  // Generate the expected string and check.
+  std::string expected("file:///foo?");
+  for (size_t i = 0; i < new_query.length(); i++)
+    expected.push_back('a');
+  EXPECT_TRUE(expected == repl_str);
+}
+
+TEST(URLCanonTest, DefaultPortForScheme) {
+  struct TestCases {
+    const char* scheme;
+    const int expected_port;
+  } cases[]{
+      {"http", 80},
+      {"https", 443},
+      {"ftp", 21},
+      {"ws", 80},
+      {"wss", 443},
+      {"fake-scheme", PORT_UNSPECIFIED},
+      {"HTTP", PORT_UNSPECIFIED},
+      {"HTTPS", PORT_UNSPECIFIED},
+      {"FTP", PORT_UNSPECIFIED},
+      {"WS", PORT_UNSPECIFIED},
+      {"WSS", PORT_UNSPECIFIED},
+  };
+
+  for (auto& test_case : cases) {
+    SCOPED_TRACE(test_case.scheme);
+    EXPECT_EQ(test_case.expected_port,
+              DefaultPortForScheme(test_case.scheme, strlen(test_case.scheme)));
+  }
+}
+
+TEST(URLCanonTest, FindWindowsDriveLetter) {
+  struct TestCase {
+    base::StringPiece spec;
+    int begin;
+    int end;  // -1 for end of spec
+    int expected_drive_letter_pos;
+  } cases[] = {
+      {"/", 0, -1, -1},
+
+      {"c:/foo", 0, -1, 0},
+      {"/c:/foo", 0, -1, 1},
+      {"//c:/foo", 0, -1, -1},  // "//" does not canonicalize to "/"
+      {"\\C|\\foo", 0, -1, 1},
+      {"/cd:/foo", 0, -1, -1},  // "/c" does not canonicalize to "/"
+      {"/./c:/foo", 0, -1, 3},
+      {"/.//c:/foo", 0, -1, -1},  // "/.//" does not canonicalize to "/"
+      {"/././c:/foo", 0, -1, 5},
+      {"/abc/c:/foo", 0, -1, -1},  // "/abc/" does not canonicalize to "/"
+      {"/abc/./../c:/foo", 0, -1, 10},
+
+      {"/c:/c:/foo", 3, -1, 4},  // actual input is "/c:/foo"
+      {"/c:/foo", 3, -1, -1},    // actual input is "/foo"
+      {"/c:/foo", 0, 1, -1},     // actual input is "/"
+  };
+
+  for (const auto& c : cases) {
+    int end = c.end;
+    if (end == -1)
+      end = c.spec.size();
+
+    EXPECT_EQ(c.expected_drive_letter_pos,
+              FindWindowsDriveLetter(c.spec.data(), c.begin, end))
+        << "for " << c.spec << "[" << c.begin << ":" << end << "] (UTF-8)";
+
+    std::u16string spec16 = base::ASCIIToUTF16(c.spec);
+    EXPECT_EQ(c.expected_drive_letter_pos,
+              FindWindowsDriveLetter(spec16.data(), c.begin, end))
+        << "for " << c.spec << "[" << c.begin << ":" << end << "] (UTF-16)";
+  }
+}
+
+TEST(URLCanonTest, IDNToASCII) {
+  RawCanonOutputW<1024> output;
+
+  // Basic ASCII test.
+  std::u16string str = u"hello";
+  EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
+  EXPECT_EQ(u"hello", std::u16string(output.data()));
+  output.set_length(0);
+
+  // Mixed ASCII/non-ASCII.
+  str = u"hellö";
+  EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
+  EXPECT_EQ(u"xn--hell-8qa", std::u16string(output.data()));
+  output.set_length(0);
+
+  // All non-ASCII.
+  str = u"你好";
+  EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
+  EXPECT_EQ(u"xn--6qq79v", std::u16string(output.data()));
+  output.set_length(0);
+
+  // Characters that need mapping (the resulting Punycode is the encoding for
+  // "1⁄4").
+  str = u"¼";
+  EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
+  EXPECT_EQ(u"xn--14-c6t", std::u16string(output.data()));
+  output.set_length(0);
+
+  // String to encode already starts with "xn--", and all ASCII. Should not
+  // modify the string.
+  str = u"xn--hell-8qa";
+  EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output));
+  EXPECT_EQ(u"xn--hell-8qa", std::u16string(output.data()));
+  output.set_length(0);
+
+  // String to encode already starts with "xn--", and mixed ASCII/non-ASCII.
+  // Should fail, due to a special case: if the label starts with "xn--", it
+  // should be parsed as Punycode, which must be all ASCII.
+  str = u"xn--hellö";
+  EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output));
+  output.set_length(0);
+
+  // String to encode already starts with "xn--", and mixed ASCII/non-ASCII.
+  // This tests that there is still an error for the character '⁄' (U+2044),
+  // which would be a valid ASCII character, U+0044, if the high byte were
+  // ignored.
+  str = u"xn--1⁄4";
+  EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output));
+  output.set_length(0);
+}
+
+}  // namespace url
diff --git a/url_constants.cc b/url_constants.cc
new file mode 100644
index 00000000000..850a31ce22d
--- /dev/null
+++ b/url_constants.cc
@@ -0,0 +1,61 @@
+// Copyright 2014 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/url_constants.h"
+
+namespace url {
+
+const char kAboutBlankURL[] = "about:blank";
+const char16_t kAboutBlankURL16[] = u"about:blank";
+const char kAboutSrcdocURL[] = "about:srcdoc";
+const char16_t kAboutSrcdocURL16[] = u"about:srcdoc";
+
+const char kAboutBlankPath[] = "blank";
+const char16_t kAboutBlankPath16[] = u"blank";
+const char kAboutSrcdocPath[] = "srcdoc";
+const char16_t kAboutSrcdocPath16[] = u"srcdoc";
+
+const char kAboutScheme[] = "about";
+const char16_t kAboutScheme16[] = u"about";
+const char kBlobScheme[] = "blob";
+const char16_t kBlobScheme16[] = u"blob";
+const char kContentScheme[] = "content";
+const char16_t kContentScheme16[] = u"content";
+const char kContentIDScheme[] = "cid";
+const char16_t kContentIDScheme16[] = u"cid";
+const char kDataScheme[] = "data";
+const char16_t kDataScheme16[] = u"data";
+const char kFileScheme[] = "file";
+const char16_t kFileScheme16[] = u"file";
+const char kFileSystemScheme[] = "filesystem";
+const char16_t kFileSystemScheme16[] = u"filesystem";
+const char kFtpScheme[] = "ftp";
+const char16_t kFtpScheme16[] = u"ftp";
+const char kHttpScheme[] = "http";
+const char16_t kHttpScheme16[] = u"http";
+const char kHttpsScheme[] = "https";
+const char16_t kHttpsScheme16[] = u"https";
+const char kJavaScriptScheme[] = "javascript";
+const char16_t kJavaScriptScheme16[] = u"javascript";
+const char kMailToScheme[] = "mailto";
+const char16_t kMailToScheme16[] = u"mailto";
+const char kTelScheme[] = "tel";
+const char16_t kTelScheme16[] = u"tel";
+const char kUrnScheme[] = "urn";
+const char16_t kUrnScheme16[] = u"urn";
+const char kUuidInPackageScheme[] = "uuid-in-package";
+const char16_t kUuidInPackageScheme16[] = u"uuid-in-package";
+const char kWebcalScheme[] = "webcal";
+const char16_t kWebcalScheme16[] = u"webcal";
+const char kWsScheme[] = "ws";
+const char16_t kWsScheme16[] = u"ws";
+const char kWssScheme[] = "wss";
+const char16_t kWssScheme16[] = u"wss";
+
+const char kStandardSchemeSeparator[] = "://";
+const char16_t kStandardSchemeSeparator16[] = u"://";
+
+const size_t kMaxURLChars = 2 * 1024 * 1024;
+
+}  // namespace url
diff --git a/url_constants.h b/url_constants.h
new file mode 100644
index 00000000000..5eda4e89f25
--- /dev/null
+++ b/url_constants.h
@@ -0,0 +1,70 @@
+// Copyright 2014 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_CONSTANTS_H_
+#define URL_URL_CONSTANTS_H_
+
+#include <stddef.h>
+
+#include "base/component_export.h"
+
+namespace url {
+
+COMPONENT_EXPORT(URL) extern const char kAboutBlankURL[];
+COMPONENT_EXPORT(URL) extern const char16_t kAboutBlankURL16[];
+COMPONENT_EXPORT(URL) extern const char kAboutSrcdocURL[];
+COMPONENT_EXPORT(URL) extern const char16_t kAboutSrcdocURL16[];
+
+COMPONENT_EXPORT(URL) extern const char kAboutBlankPath[];
+COMPONENT_EXPORT(URL) extern const char16_t kAboutBlankPath16[];
+COMPONENT_EXPORT(URL) extern const char kAboutSrcdocPath[];
+COMPONENT_EXPORT(URL) extern const char16_t kAboutSrcdocPath16[];
+
+COMPONENT_EXPORT(URL) extern const char kAboutScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kAboutScheme16[];
+COMPONENT_EXPORT(URL) extern const char kBlobScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kBlobScheme16[];
+// The content scheme is specific to Android for identifying a stored file.
+COMPONENT_EXPORT(URL) extern const char kContentScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kContentScheme16[];
+COMPONENT_EXPORT(URL) extern const char kContentIDScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kContentIDScheme16[];
+COMPONENT_EXPORT(URL) extern const char kDataScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kDataScheme16[];
+COMPONENT_EXPORT(URL) extern const char kFileScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kFileScheme16[];
+COMPONENT_EXPORT(URL) extern const char kFileSystemScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kFileSystemScheme16[];
+COMPONENT_EXPORT(URL) extern const char kFtpScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kFtpScheme16[];
+COMPONENT_EXPORT(URL) extern const char kHttpScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kHttpScheme16[];
+COMPONENT_EXPORT(URL) extern const char kHttpsScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kHttpsScheme16[];
+COMPONENT_EXPORT(URL) extern const char kJavaScriptScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kJavaScriptScheme16[];
+COMPONENT_EXPORT(URL) extern const char kMailToScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kMailToScheme16[];
+COMPONENT_EXPORT(URL) extern const char kTelScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kTelScheme16[];
+COMPONENT_EXPORT(URL) extern const char kUrnScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kUrnScheme16[];
+COMPONENT_EXPORT(URL) extern const char kUuidInPackageScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kUuidInPackageScheme16[];
+COMPONENT_EXPORT(URL) extern const char kWebcalScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kWebcalScheme16[];
+COMPONENT_EXPORT(URL) extern const char kWsScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kWsScheme16[];
+COMPONENT_EXPORT(URL) extern const char kWssScheme[];
+COMPONENT_EXPORT(URL) extern const char16_t kWssScheme16[];
+
+// Used to separate a standard scheme and the hostname: "://".
+COMPONENT_EXPORT(URL) extern const char kStandardSchemeSeparator[];
+COMPONENT_EXPORT(URL) extern const char16_t kStandardSchemeSeparator16[];
+
+COMPONENT_EXPORT(URL) extern const size_t kMaxURLChars;
+
+}  // namespace url
+
+#endif  // URL_URL_CONSTANTS_H_
diff --git a/url_features.cc b/url_features.cc
new file mode 100644
index 00000000000..8f38ff257a8
--- /dev/null
+++ b/url_features.cc
@@ -0,0 +1,35 @@
+// Copyright 2022 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/url_features.h"
+
+namespace url {
+
+BASE_FEATURE(kUseIDNA2008NonTransitional,
+             "UseIDNA2008NonTransitional",
+             base::FEATURE_ENABLED_BY_DEFAULT);
+
+// Kill switch for crbug.com/1362507.
+BASE_FEATURE(kRecordIDNA2008Metrics,
+             "RecordIDNA2008Metrics",
+             base::FEATURE_ENABLED_BY_DEFAULT);
+
+BASE_FEATURE(kStrictIPv4EmbeddedIPv6AddressParsing,
+             "StrictIPv4EmbeddedIPv6AddressParsing",
+             base::FEATURE_DISABLED_BY_DEFAULT);
+
+// Kill switch for crbug.com/1220361.
+BASE_FEATURE(kResolveBareFragmentWithColonOnNonHierarchical,
+             "ResolveBareFragmentWithColonOnNonHierarchical",
+             base::FEATURE_ENABLED_BY_DEFAULT);
+
+bool IsUsingIDNA2008NonTransitional() {
+  return base::FeatureList::IsEnabled(kUseIDNA2008NonTransitional);
+}
+
+bool IsRecordingIDNA2008Metrics() {
+  return base::FeatureList::IsEnabled(kRecordIDNA2008Metrics);
+}
+
+}  // namespace url
diff --git a/url_features.h b/url_features.h
new file mode 100644
index 00000000000..e95752141f3
--- /dev/null
+++ b/url_features.h
@@ -0,0 +1,33 @@
+// Copyright 2022 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_FEATURES_H_
+#define URL_URL_FEATURES_H_
+
+#include "base/component_export.h"
+#include "base/feature_list.h"
+
+namespace url {
+
+COMPONENT_EXPORT(URL) BASE_DECLARE_FEATURE(kUseIDNA2008NonTransitional);
+
+// Returns true if Chrome is using IDNA 2008 in Non-Transitional mode.
+COMPONENT_EXPORT(URL) bool IsUsingIDNA2008NonTransitional();
+
+// Returns true if Chrome is recording IDNA 2008 related metrics.
+COMPONENT_EXPORT(URL) bool IsRecordingIDNA2008Metrics();
+
+// Returns true if Chrome is enforcing the 4 part check for IPv4 embedded IPv6
+// addresses.
+COMPONENT_EXPORT(URL)
+BASE_DECLARE_FEATURE(kStrictIPv4EmbeddedIPv6AddressParsing);
+
+// When enabled, allows resolving of a bare fragment containing a colon against
+// a non-hierarchical URL. (For example '#foo:bar' against 'about:blank'.)
+COMPONENT_EXPORT(URL)
+BASE_DECLARE_FEATURE(kResolveBareFragmentWithColonOnNonHierarchical);
+
+}  // namespace url
+
+#endif  // URL_URL_FEATURES_H_
diff --git a/url_file.h b/url_file.h
new file mode 100644
index 00000000000..65ce98ac404
--- /dev/null
+++ b/url_file.h
@@ -0,0 +1,101 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_FILE_H_
+#define URL_URL_FILE_H_
+
+// Provides shared functions used by the internals of the parser and
+// canonicalizer for file URLs. Do not use outside of these modules.
+
+#include "base/strings/string_util.h"
+#include "url/url_parse_internal.h"
+
+namespace url {
+
+// We allow both "c:" and "c|" as drive identifiers.
+inline bool IsWindowsDriveSeparator(char16_t ch) {
+  return ch == ':' || ch == '|';
+}
+inline bool IsWindowsDriveSeparator(char ch) {
+  return IsWindowsDriveSeparator(static_cast<char16_t>(ch));
+}
+
+// Returns the index of the next slash in the input after the given index, or
+// spec_len if the end of the input is reached.
+template<typename CHAR>
+inline int FindNextSlash(const CHAR* spec, int begin_index, int spec_len) {
+  int idx = begin_index;
+  while (idx < spec_len && !IsURLSlash(spec[idx]))
+    idx++;
+  return idx;
+}
+
+// DoesContainWindowsDriveSpecUntil returns the least number between
+// start_offset and max_offset such that the spec has a valid drive
+// specification starting at that offset. Otherwise it returns -1. This function
+// gracefully handles, by returning -1, start_offset values that are equal to or
+// larger than the spec_len, and caps max_offset appropriately to simplify
+// callers. max_offset must be at least start_offset.
+template <typename CHAR>
+inline int DoesContainWindowsDriveSpecUntil(const CHAR* spec,
+                                            int start_offset,
+                                            int max_offset,
+                                            int spec_len) {
+  CHECK_LE(start_offset, max_offset);
+  if (start_offset > spec_len - 2)
+    return -1;  // Not enough room.
+  if (max_offset > spec_len - 2)
+    max_offset = spec_len - 2;
+  for (int offset = start_offset; offset <= max_offset; ++offset) {
+    if (!base::IsAsciiAlpha(spec[offset]))
+      continue;  // Doesn't contain a valid drive letter.
+    if (!IsWindowsDriveSeparator(spec[offset + 1]))
+      continue;  // Isn't followed with a drive separator.
+    return offset;
+  }
+  return -1;
+}
+
+// Returns true if the start_offset in the given spec looks like it begins a
+// drive spec, for example "c:". This function explicitly handles start_offset
+// values that are equal to or larger than the spec_len to simplify callers.
+//
+// If this returns true, the spec is guaranteed to have a valid drive letter
+// plus a drive letter separator (a colon or a pipe) starting at |start_offset|.
+template <typename CHAR>
+inline bool DoesBeginWindowsDriveSpec(const CHAR* spec,
+                                      int start_offset,
+                                      int spec_len) {
+  return DoesContainWindowsDriveSpecUntil(spec, start_offset, start_offset,
+                                          spec_len) == start_offset;
+}
+
+#ifdef WIN32
+
+// Returns true if the start_offset in the given text looks like it begins a
+// UNC path, for example "\\". This function explicitly handles start_offset
+// values that are equal to or larger than the spec_len to simplify callers.
+//
+// When strict_slashes is set, this function will only accept backslashes as is
+// standard for Windows. Otherwise, it will accept forward slashes as well
+// which we use for a lot of URL handling.
+template<typename CHAR>
+inline bool DoesBeginUNCPath(const CHAR* text,
+                             int start_offset,
+                             int len,
+                             bool strict_slashes) {
+  int remaining_len = len - start_offset;
+  if (remaining_len < 2)
+    return false;
+
+  if (strict_slashes)
+    return text[start_offset] == '\\' && text[start_offset + 1] == '\\';
+  return IsURLSlash(text[start_offset]) && IsURLSlash(text[start_offset + 1]);
+}
+
+#endif  // WIN32
+
+}  // namespace url
+
+#endif  // URL_URL_FILE_H_
diff --git a/url_idna_icu.cc b/url_idna_icu.cc
new file mode 100644
index 00000000000..0a552a88013
--- /dev/null
+++ b/url_idna_icu.cc
@@ -0,0 +1,144 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// ICU-based IDNA converter.
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <ostream>
+
+#include "base/check_op.h"
+#include "third_party/icu/source/common/unicode/uidna.h"
+#include "third_party/icu/source/common/unicode/utypes.h"
+#include "url/url_canon_icu.h"
+#include "url/url_canon_internal.h"  // for _itoa_s
+#include "url/url_features.h"
+
+namespace url {
+
+namespace {
+
+// Use UIDNA, a C pointer to a UTS46/IDNA 2008 handling object opened with
+// uidna_openUTS46().
+//
+// We use UTS46 with BiDiCheck to migrate from IDNA 2003 (with unassigned
+// code points allowed) to IDNA 2008 with the backward compatibility in mind.
+// What it does:
+//
+// 1. Use the up-to-date Unicode data.
+// 2. Define a case folding/mapping with the up-to-date Unicode data as
+//    in IDNA 2003.
+// 3. If `use_idna_non_transitional` is true, use non-transitional mechanism for
+//    4 deviation characters (sharp-s, final sigma, ZWJ and ZWNJ) per
+//    url.spec.whatwg.org.
+// 4. Continue to allow symbols and punctuations.
+// 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules.
+// 6. Do not apply STD3 rules
+// 7. Do not allow unassigned code points.
+//
+// It also closely matches what IE 10 does except for the BiDi check (
+// http://goo.gl/3XBhqw ).
+// See http://http://unicode.org/reports/tr46/ and references therein
+// for more details.
+UIDNA* CreateIDNA(bool use_idna_non_transitional) {
+  uint32_t options = UIDNA_CHECK_BIDI;
+  if (use_idna_non_transitional) {
+    // Use non-transitional processing if enabled. See
+    // https://url.spec.whatwg.org/#idna for details.
+    options |=
+        UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE;
+  }
+  UErrorCode err = U_ZERO_ERROR;
+  UIDNA* idna = uidna_openUTS46(options, &err);
+  if (U_FAILURE(err)) {
+    CHECK(false) << "failed to open UTS46 data with error: " << u_errorName(err)
+                 << ". If you see this error message in a test environment "
+                 << "your test environment likely lacks the required data "
+                 << "tables for libicu. See https://crbug.com/778929.";
+    idna = nullptr;
+  }
+  return idna;
+}
+
+UIDNA* GetUIDNA() {
+  // This logic results in having two UIDNA instances in tests. This is okay.
+  if (IsUsingIDNA2008NonTransitional()) {
+    static UIDNA* uidna = CreateIDNA(/*use_idna_non_transitional=*/true);
+    return uidna;
+  } else {
+    static UIDNA* uidna = CreateIDNA(/*use_idna_non_transitional=*/false);
+    return uidna;
+  }
+}
+
+}  // namespace
+
+// Converts the Unicode input representing a hostname to ASCII using IDN rules.
+// The output must be ASCII, but is represented as wide characters.
+//
+// On success, the output will be filled with the ASCII host name and it will
+// return true. Unlike most other canonicalization functions, this assumes that
+// the output is empty. The beginning of the host will be at offset 0, and
+// the length of the output will be set to the length of the new host name.
+//
+// On error, this will return false. The output in this case is undefined.
+// TODO(jungshik): use UTF-8/ASCII version of nameToASCII.
+// Change the function signature and callers accordingly to avoid unnecessary
+// conversions in our code. In addition, consider using icu::IDNA's UTF-8/ASCII
+// version with StringByteSink. That way, we can avoid C wrappers and additional
+// string conversion.
+bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) {
+  DCHECK(output->length() == 0);  // Output buffer is assumed empty.
+
+  UIDNA* uidna = GetUIDNA();
+  DCHECK(uidna != nullptr);
+  while (true) {
+    UErrorCode err = U_ZERO_ERROR;
+    UIDNAInfo info = UIDNA_INFO_INITIALIZER;
+    int output_length = uidna_nameToASCII(uidna, src, src_len, output->data(),
+                                          output->capacity(), &info, &err);
+
+    // Ignore various errors for web compatibility. The options are specified
+    // by the WHATWG URL Standard. See
+    //  - https://unicode.org/reports/tr46/
+    //  - https://url.spec.whatwg.org/#concept-domain-to-ascii
+    //    (we set beStrict to false)
+
+    // Disable the "CheckHyphens" option in UTS #46. See
+    //  - https://crbug.com/804688
+    //  - https://github.com/whatwg/url/issues/267
+    info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
+    info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
+    info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
+
+    // Disable the "VerifyDnsLength" option in UTS #46.
+    info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
+    info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
+    info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
+
+    if (U_SUCCESS(err) && info.errors == 0) {
+      // Per WHATWG URL, it is a failure if the ToASCII output is empty.
+      //
+      // ICU would usually return UIDNA_ERROR_EMPTY_LABEL in this case, but we
+      // want to continue allowing http://abc..def/ while forbidding http:///.
+      //
+      if (output_length == 0) {
+        return false;
+      }
+
+      output->set_length(output_length);
+      return true;
+    }
+
+    if (err != U_BUFFER_OVERFLOW_ERROR || info.errors != 0)
+      return false;  // Unknown error, give up.
+
+    // Not enough room in our buffer, expand.
+    output->Resize(output_length);
+  }
+}
+
+}  // namespace url
diff --git a/url_idna_icu_alternatives_android.cc b/url_idna_icu_alternatives_android.cc
new file mode 100644
index 00000000000..9faf5710f90
--- /dev/null
+++ b/url_idna_icu_alternatives_android.cc
@@ -0,0 +1,40 @@
+// Copyright 2014 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <string.h>
+
+#include <string>
+
+#include "base/android/jni_android.h"
+#include "base/android/jni_string.h"
+#include "base/strings/string_piece.h"
+#include "url/url_canon_internal.h"
+#include "url/url_jni_headers/IDNStringUtil_jni.h"
+
+using base::android::ScopedJavaLocalRef;
+
+namespace url {
+
+// This uses the JDK's conversion function, which uses IDNA 2003, unlike the
+// ICU implementation.
+bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) {
+  DCHECK_EQ(0u, output->length());  // Output buffer is assumed empty.
+
+  JNIEnv* env = base::android::AttachCurrentThread();
+  base::android::ScopedJavaLocalRef<jstring> java_src =
+      base::android::ConvertUTF16ToJavaString(
+          env, base::StringPiece16(src, src_len));
+  ScopedJavaLocalRef<jstring> java_result =
+      android::Java_IDNStringUtil_idnToASCII(env, java_src);
+  // NULL indicates failure.
+  if (java_result.is_null())
+    return false;
+
+  std::u16string utf16_result =
+      base::android::ConvertJavaStringToUTF16(java_result);
+  output->Append(utf16_result.data(), utf16_result.size());
+  return true;
+}
+
+}  // namespace url
diff --git a/url_idna_icu_alternatives_ios.mm b/url_idna_icu_alternatives_ios.mm
new file mode 100644
index 00000000000..d604b351632
--- /dev/null
+++ b/url_idna_icu_alternatives_ios.mm
@@ -0,0 +1,28 @@
+// Copyright 2016 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <string.h>
+
+#include <ostream>
+#include <string>
+
+#include "base/strings/string_piece.h"
+#include "base/strings/string_util.h"
+#include "base/strings/utf_string_conversions.h"
+#include "url/url_canon_internal.h"
+
+namespace url {
+
+// Only allow ASCII to avoid ICU dependency. Use NSString+IDN
+// to convert non-ASCII URL prior to passing to API.
+bool IDNToASCII(const char16_t* src, int src_len, CanonOutputW* output) {
+  if (base::IsStringASCII(base::StringPiece16(src, src_len))) {
+    output->Append(src, src_len);
+    return true;
+  }
+  DCHECK(false) << "IDN URL support is not available.";
+  return false;
+}
+
+}  // namespace url
diff --git a/url_parse_file.cc b/url_parse_file.cc
new file mode 100644
index 00000000000..979ec82b97a
--- /dev/null
+++ b/url_parse_file.cc
@@ -0,0 +1,198 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/check.h"
+#include "url/third_party/mozilla/url_parse.h"
+#include "url/url_file.h"
+#include "url/url_parse_internal.h"
+
+// Interesting IE file:isms...
+//
+//  INPUT                      OUTPUT
+//  =========================  ==============================
+//  file:/foo/bar              file:///foo/bar
+//      The result here seems totally invalid!?!? This isn't UNC.
+//
+//  file:/
+//  file:// or any other number of slashes
+//      IE6 doesn't do anything at all if you click on this link. No error:
+//      nothing. IE6's history system seems to always color this link, so I'm
+//      guessing that it maps internally to the empty URL.
+//
+//  C:\                        file:///C:/
+//      When on a file: URL source page, this link will work. When over HTTP,
+//      the file: URL will appear in the status bar but the link will not work
+//      (security restriction for all file URLs).
+//
+//  file:foo/                  file:foo/     (invalid?!?!?)
+//  file:/foo/                 file:///foo/  (invalid?!?!?)
+//  file://foo/                file://foo/   (UNC to server "foo")
+//  file:///foo/               file:///foo/  (invalid, seems to be a file)
+//  file:////foo/              file://foo/   (UNC to server "foo")
+//      Any more than four slashes is also treated as UNC.
+//
+//  file:C:/                   file://C:/
+//  file:/C:/                  file://C:/
+//      The number of slashes after "file:" don't matter if the thing following
+//      it looks like an absolute drive path. Also, slashes and backslashes are
+//      equally valid here.
+
+namespace url {
+
+namespace {
+
+// A subcomponent of DoParseFileURL, the input of this function should be a UNC
+// path name, with the index of the first character after the slashes following
+// the scheme given in |after_slashes|. This will initialize the host, path,
+// query, and ref, and leave the other output components untouched
+// (DoParseFileURL handles these for us).
+template <typename CHAR>
+void DoParseUNC(const CHAR* spec,
+                int after_slashes,
+                int spec_len,
+                Parsed* parsed) {
+  int next_slash = FindNextSlash(spec, after_slashes, spec_len);
+
+  // Everything up until that first slash we found (or end of string) is the
+  // host name, which will end up being the UNC host. For example,
+  // "file://foo/bar.txt" will get a server name of "foo" and a path of "/bar".
+  // Later, on Windows, this should be treated as the filename "\\foo\bar.txt"
+  // in proper UNC notation.
+  if (after_slashes < next_slash)
+    parsed->host = MakeRange(after_slashes, next_slash);
+  else
+    parsed->host.reset();
+  if (next_slash < spec_len) {
+    ParsePathInternal(spec, MakeRange(next_slash, spec_len),
+                      &parsed->path, &parsed->query, &parsed->ref);
+  } else {
+    parsed->path.reset();
+  }
+}
+
+// A subcomponent of DoParseFileURL, the input should be a local file, with the
+// beginning of the path indicated by the index in |path_begin|. This will
+// initialize the host, path, query, and ref, and leave the other output
+// components untouched (DoParseFileURL handles these for us).
+template<typename CHAR>
+void DoParseLocalFile(const CHAR* spec,
+                      int path_begin,
+                      int spec_len,
+                      Parsed* parsed) {
+  parsed->host.reset();
+  ParsePathInternal(spec, MakeRange(path_begin, spec_len),
+                    &parsed->path, &parsed->query, &parsed->ref);
+}
+
+// Backend for the external functions that operates on either char type.
+// Handles cases where there is a scheme, but also when handed the first
+// character following the "file:" at the beginning of the spec. If so,
+// this is usually a slash, but needn't be; we allow paths like "file:c:\foo".
+template<typename CHAR>
+void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) {
+  DCHECK(spec_len >= 0);
+
+  // Get the parts we never use for file URLs out of the way.
+  parsed->username.reset();
+  parsed->password.reset();
+  parsed->port.reset();
+
+  // Many of the code paths don't set these, so it's convenient to just clear
+  // them. We'll write them in those cases we need them.
+  parsed->query.reset();
+  parsed->ref.reset();
+
+  // Strip leading & trailing spaces and control characters.
+  int begin = 0;
+  TrimURL(spec, &begin, &spec_len);
+
+  // Find the scheme, if any.
+  int num_slashes = CountConsecutiveSlashes(spec, begin, spec_len);
+  int after_scheme;
+  int after_slashes;
+#ifdef WIN32
+  // See how many slashes there are. We want to handle cases like UNC but also
+  // "/c:/foo". This is when there is no scheme, so we can allow pages to do
+  // links like "c:/foo/bar" or "//foo/bar". This is also called by the
+  // relative URL resolver when it determines there is an absolute URL, which
+  // may give us input like "/c:/foo".
+  after_slashes = begin + num_slashes;
+  if (DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len)) {
+    // Windows path, don't try to extract the scheme (for example, "c:\foo").
+    parsed->scheme.reset();
+    after_scheme = after_slashes;
+  } else if (DoesBeginUNCPath(spec, begin, spec_len, false)) {
+    // Windows UNC path: don't try to extract the scheme, but keep the slashes.
+    parsed->scheme.reset();
+    after_scheme = begin;
+  } else
+#endif
+  {
+    // ExtractScheme doesn't understand the possibility of filenames with
+    // colons in them, in which case it returns the entire spec up to the
+    // colon as the scheme. So handle /foo.c:5 as a file but foo.c:5 as
+    // the foo.c: scheme.
+    if (!num_slashes &&
+        ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
+      // Offset the results since we gave ExtractScheme a substring.
+      parsed->scheme.begin += begin;
+      after_scheme = parsed->scheme.end() + 1;
+    } else {
+      // No scheme found, remember that.
+      parsed->scheme.reset();
+      after_scheme = begin;
+    }
+  }
+
+  // Handle empty specs ones that contain only whitespace or control chars,
+  // or that are just the scheme (for example "file:").
+  if (after_scheme == spec_len) {
+    parsed->host.reset();
+    parsed->path.reset();
+    return;
+  }
+
+  num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
+  after_slashes = after_scheme + num_slashes;
+#ifdef WIN32
+  // Check whether the input is a drive again. We checked above for windows
+  // drive specs, but that's only at the very beginning to see if we have a
+  // scheme at all. This test will be duplicated in that case, but will
+  // additionally handle all cases with a real scheme such as "file:///C:/".
+  if (!DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len) &&
+      num_slashes != 3) {
+    // Anything not beginning with a drive spec ("c:\") on Windows is treated
+    // as UNC, with the exception of three slashes which always means a file.
+    // Even IE7 treats file:///foo/bar as "/foo/bar", which then fails.
+    DoParseUNC(spec, after_slashes, spec_len, parsed);
+    return;
+  }
+#else
+  // file: URL with exactly 2 slashes is considered to have a host component.
+  if (num_slashes == 2) {
+    DoParseUNC(spec, after_slashes, spec_len, parsed);
+    return;
+  }
+#endif  // WIN32
+
+  // Easy and common case, the full path immediately follows the scheme
+  // (modulo slashes), as in "file://c:/foo". Just treat everything from
+  // there to the end as the path. Empty hosts have 0 length instead of -1.
+  // We include the last slash as part of the path if there is one.
+  DoParseLocalFile(spec,
+      num_slashes > 0 ? after_scheme + num_slashes - 1 : after_scheme,
+      spec_len, parsed);
+}
+
+}  // namespace
+
+void ParseFileURL(const char* url, int url_len, Parsed* parsed) {
+  DoParseFileURL(url, url_len, parsed);
+}
+
+void ParseFileURL(const char16_t* url, int url_len, Parsed* parsed) {
+  DoParseFileURL(url, url_len, parsed);
+}
+
+}  // namespace url
diff --git a/url_parse_internal.h b/url_parse_internal.h
new file mode 100644
index 00000000000..a73f13b184c
--- /dev/null
+++ b/url_parse_internal.h
@@ -0,0 +1,96 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_PARSE_INTERNAL_H_
+#define URL_URL_PARSE_INTERNAL_H_
+
+// Contains common inline helper functions used by the URL parsing routines.
+
+#include "url/third_party/mozilla/url_parse.h"
+
+namespace url {
+
+// We treat slashes and backslashes the same for IE compatibility.
+inline bool IsURLSlash(char16_t ch) {
+  return ch == '/' || ch == '\\';
+}
+inline bool IsURLSlash(char ch) {
+  return IsURLSlash(static_cast<char16_t>(ch));
+}
+
+// Returns true if we should trim this character from the URL because it is a
+// space or a control character.
+inline bool ShouldTrimFromURL(char16_t ch) {
+  return ch <= ' ';
+}
+inline bool ShouldTrimFromURL(char ch) {
+  return ShouldTrimFromURL(static_cast<char16_t>(ch));
+}
+
+// Given an already-initialized begin index and length, this shrinks the range
+// to eliminate "should-be-trimmed" characters. Note that the length does *not*
+// indicate the length of untrimmed data from |*begin|, but rather the position
+// in the input string (so the string starts at character |*begin| in the spec,
+// and goes until |*len|).
+template<typename CHAR>
+inline void TrimURL(const CHAR* spec, int* begin, int* len,
+                    bool trim_path_end = true) {
+  // Strip leading whitespace and control characters.
+  while (*begin < *len && ShouldTrimFromURL(spec[*begin]))
+    (*begin)++;
+
+  if (trim_path_end) {
+    // Strip trailing whitespace and control characters. We need the >i test
+    // for when the input string is all blanks; we don't want to back past the
+    // input.
+    while (*len > *begin && ShouldTrimFromURL(spec[*len - 1]))
+      (*len)--;
+  }
+}
+
+// Counts the number of consecutive slashes starting at the given offset
+// in the given string of the given length.
+template<typename CHAR>
+inline int CountConsecutiveSlashes(const CHAR *str,
+                                   int begin_offset, int str_len) {
+  int count = 0;
+  while (begin_offset + count < str_len &&
+         IsURLSlash(str[begin_offset + count]))
+    ++count;
+  return count;
+}
+
+// Internal functions in url_parse.cc that parse the path, that is, everything
+// following the authority section. The input is the range of everything
+// following the authority section, and the output is the identified ranges.
+//
+// This is designed for the file URL parser or other consumers who may do
+// special stuff at the beginning, but want regular path parsing, it just
+// maps to the internal parsing function for paths.
+void ParsePathInternal(const char* spec,
+                       const Component& path,
+                       Component* filepath,
+                       Component* query,
+                       Component* ref);
+void ParsePathInternal(const char16_t* spec,
+                       const Component& path,
+                       Component* filepath,
+                       Component* query,
+                       Component* ref);
+
+// Given a spec and a pointer to the character after the colon following the
+// scheme, this parses it and fills in the structure, Every item in the parsed
+// structure is filled EXCEPT for the scheme, which is untouched.
+void ParseAfterScheme(const char* spec,
+                      int spec_len,
+                      int after_scheme,
+                      Parsed* parsed);
+void ParseAfterScheme(const char16_t* spec,
+                      int spec_len,
+                      int after_scheme,
+                      Parsed* parsed);
+
+}  // namespace url
+
+#endif  // URL_URL_PARSE_INTERNAL_H_
diff --git a/url_parse_perftest.cc b/url_parse_perftest.cc
new file mode 100644
index 00000000000..7fe1d39b1e9
--- /dev/null
+++ b/url_parse_perftest.cc
@@ -0,0 +1,135 @@
+// Copyright 2006-2008 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/string_piece.h"
+#include "base/test/perf_time_logger.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/gurl.h"
+#include "url/third_party/mozilla/url_parse.h"
+#include "url/url_canon.h"
+#include "url/url_canon_stdstring.h"
+
+namespace {
+
+TEST(URLParse, FullURL) {
+  constexpr base::StringPiece kUrl =
+      "http://me:pass@host/foo/bar.html;param?query=yes#ref";
+
+  url::Parsed parsed;
+  base::PerfTimeLogger timer("Full_URL_Parse_AMillion");
+
+  for (int i = 0; i < 1000000; i++)
+    url::ParseStandardURL(kUrl.data(), kUrl.size(), &parsed);
+  timer.Done();
+}
+
+constexpr base::StringPiece kTypicalUrl1 =
+    "http://www.google.com/"
+    "search?q=url+parsing&ie=utf-8&oe=utf-8&aq=t&rls=org.mozilla:en-US:"
+    "official&client=firefox-a";
+
+constexpr base::StringPiece kTypicalUrl2 =
+    "http://www.amazon.com/Stephen-King-Thrillers-Horror-People/dp/0766012336/"
+    "ref=sr_1_2/133-4144931-4505264?ie=UTF8&s=books&qid=2144880915&sr=8-2";
+
+constexpr base::StringPiece kTypicalUrl3 =
+    "http://store.apple.com/1-800-MY-APPLE/WebObjects/AppleStore.woa/wa/"
+    "RSLID?nnmm=browse&mco=578E9744&node=home/desktop/mac_pro";
+
+TEST(URLParse, TypicalURLParse) {
+  url::Parsed parsed1;
+  url::Parsed parsed2;
+  url::Parsed parsed3;
+
+  // Do this 1/3 of a million times since we do 3 different URLs.
+  base::PerfTimeLogger parse_timer("Typical_URL_Parse_AMillion");
+  for (int i = 0; i < 333333; i++) {
+    url::ParseStandardURL(kTypicalUrl1.data(), kTypicalUrl1.size(), &parsed1);
+    url::ParseStandardURL(kTypicalUrl2.data(), kTypicalUrl2.size(), &parsed2);
+    url::ParseStandardURL(kTypicalUrl3.data(), kTypicalUrl3.size(), &parsed3);
+  }
+  parse_timer.Done();
+}
+
+// Includes both parsing and canonicalization with no mallocs.
+TEST(URLParse, TypicalURLParseCanon) {
+  url::Parsed parsed1;
+  url::Parsed parsed2;
+  url::Parsed parsed3;
+
+  base::PerfTimeLogger canon_timer("Typical_Parse_Canon_AMillion");
+  url::Parsed out_parsed;
+  url::RawCanonOutput<1024> output;
+  for (int i = 0; i < 333333; i++) {  // divide by 3 so we get 1M
+    url::ParseStandardURL(kTypicalUrl1.data(), kTypicalUrl1.size(), &parsed1);
+    output.set_length(0);
+    url::CanonicalizeStandardURL(
+        kTypicalUrl1.data(), kTypicalUrl1.size(), parsed1,
+        url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr, &output,
+        &out_parsed);
+
+    url::ParseStandardURL(kTypicalUrl2.data(), kTypicalUrl2.size(), &parsed2);
+    output.set_length(0);
+    url::CanonicalizeStandardURL(
+        kTypicalUrl2.data(), kTypicalUrl2.size(), parsed2,
+        url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr, &output,
+        &out_parsed);
+
+    url::ParseStandardURL(kTypicalUrl3.data(), kTypicalUrl3.size(), &parsed3);
+    output.set_length(0);
+    url::CanonicalizeStandardURL(
+        kTypicalUrl3.data(), kTypicalUrl3.size(), parsed3,
+        url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr, &output,
+        &out_parsed);
+  }
+  canon_timer.Done();
+}
+
+// Includes both parsing and canonicalization, and mallocs for the output.
+TEST(URLParse, TypicalURLParseCanonStdString) {
+  url::Parsed parsed1;
+  url::Parsed parsed2;
+  url::Parsed parsed3;
+
+  base::PerfTimeLogger canon_timer("Typical_Parse_Canon_AMillion");
+  url::Parsed out_parsed;
+  for (int i = 0; i < 333333; i++) {  // divide by 3 so we get 1M
+    url::ParseStandardURL(kTypicalUrl1.data(), kTypicalUrl1.size(), &parsed1);
+    std::string out1;
+    url::StdStringCanonOutput output1(&out1);
+    url::CanonicalizeStandardURL(
+        kTypicalUrl1.data(), kTypicalUrl1.size(), parsed1,
+        url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr, &output1,
+        &out_parsed);
+
+    url::ParseStandardURL(kTypicalUrl2.data(), kTypicalUrl2.size(), &parsed2);
+    std::string out2;
+    url::StdStringCanonOutput output2(&out2);
+    url::CanonicalizeStandardURL(
+        kTypicalUrl2.data(), kTypicalUrl2.size(), parsed2,
+        url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr, &output2,
+        &out_parsed);
+
+    url::ParseStandardURL(kTypicalUrl3.data(), kTypicalUrl3.size(), &parsed3);
+    std::string out3;
+    url::StdStringCanonOutput output3(&out3);
+    url::CanonicalizeStandardURL(
+        kTypicalUrl3.data(), kTypicalUrl3.size(), parsed3,
+        url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, nullptr, &output3,
+        &out_parsed);
+  }
+  canon_timer.Done();
+}
+
+TEST(URLParse, GURL) {
+  base::PerfTimeLogger gurl_timer("Typical_GURL_AMillion");
+  for (int i = 0; i < 333333; i++) {  // divide by 3 so we get 1M
+    GURL gurl1(kTypicalUrl1);
+    GURL gurl2(kTypicalUrl2);
+    GURL gurl3(kTypicalUrl3);
+  }
+  gurl_timer.Done();
+}
+
+}  // namespace
diff --git a/url_parse_unittest.cc b/url_parse_unittest.cc
new file mode 100644
index 00000000000..88b6f05efb5
--- /dev/null
+++ b/url_parse_unittest.cc
@@ -0,0 +1,687 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/third_party/mozilla/url_parse.h"
+
+// Interesting IE file:isms...
+//
+//  file:/foo/bar              file:///foo/bar
+//      The result here seems totally invalid!?!? This isn't UNC.
+//
+//  file:/
+//  file:// or any other number of slashes
+//      IE6 doesn't do anything at all if you click on this link. No error:
+//      nothing. IE6's history system seems to always color this link, so I'm
+//      guessing that it maps internally to the empty URL.
+//
+//  C:\                        file:///C:/
+//  /                          file:///C:/
+//  /foo                       file:///C:/foo
+//      Interestingly, IE treats "/" as an alias for "c:\", which makes sense,
+//      but is weird to think about on Windows.
+//
+//  file:foo/                  file:foo/  (invalid?!?!?)
+//  file:/foo/                 file:///foo/  (invalid?!?!?)
+//  file://foo/                file://foo/   (UNC to server "foo")
+//  file:///foo/               file:///foo/  (invalid)
+//  file:////foo/              file://foo/   (UNC to server "foo")
+//      Any more than four slashes is also treated as UNC.
+//
+//  file:C:/                   file://C:/
+//  file:/C:/                  file://C:/
+//      The number of slashes after "file:" don't matter if the thing following
+//      it looks like an absolute drive path. Also, slashes and backslashes are
+//      equally valid here.
+
+namespace url {
+namespace {
+
+// Used for regular URL parse cases.
+struct URLParseCase {
+  const char* input;
+
+  const char* scheme;
+  const char* username;
+  const char* password;
+  const char* host;
+  int port;
+  const char* path;
+  const char* query;
+  const char* ref;
+};
+
+// Simpler version of URLParseCase for testing path URLs.
+struct PathURLParseCase {
+  const char* input;
+
+  const char* scheme;
+  const char* path;
+};
+
+// Simpler version of URLParseCase for testing mailto URLs.
+struct MailtoURLParseCase {
+  const char* input;
+
+  const char* scheme;
+  const char* path;
+  const char* query;
+};
+
+// More complicated version of URLParseCase for testing filesystem URLs.
+struct FileSystemURLParseCase {
+  const char* input;
+
+  const char* inner_scheme;
+  const char* inner_username;
+  const char* inner_password;
+  const char* inner_host;
+  int inner_port;
+  const char* inner_path;
+  const char* path;
+  const char* query;
+  const char* ref;
+};
+
+bool ComponentMatches(const char* input,
+                      const char* reference,
+                      const Component& component) {
+  // Check that the -1 sentinel is the only allowed negative value.
+  EXPECT_TRUE(component.is_valid() || component.len == -1);
+
+  // Begin should be valid.
+  EXPECT_LE(0, component.begin);
+
+  // A NULL reference means the component should be nonexistent.
+  if (!reference)
+    return component.len == -1;
+  if (!component.is_valid())
+    return false;  // Reference is not NULL but we don't have anything
+
+  if (strlen(reference) != static_cast<size_t>(component.len))
+    return false;  // Lengths don't match
+
+  // Now check the actual characters.
+  return strncmp(reference, &input[component.begin], component.len) == 0;
+}
+
+void ExpectInvalidComponent(const Component& component) {
+  EXPECT_EQ(0, component.begin);
+  EXPECT_EQ(-1, component.len);
+}
+
+// Parsed ----------------------------------------------------------------------
+
+TEST(URLParser, Length) {
+  const char* length_cases[] = {
+      // One with everything in it.
+    "http://user:pass@host:99/foo?bar#baz",
+      // One with nothing in it.
+    "",
+      // Working backwards, let's start taking off stuff from the full one.
+    "http://user:pass@host:99/foo?bar#",
+    "http://user:pass@host:99/foo?bar",
+    "http://user:pass@host:99/foo?",
+    "http://user:pass@host:99/foo",
+    "http://user:pass@host:99/",
+    "http://user:pass@host:99",
+    "http://user:pass@host:",
+    "http://user:pass@host",
+    "http://host",
+    "http://user@",
+    "http:",
+  };
+  for (size_t i = 0; i < std::size(length_cases); i++) {
+    int true_length = static_cast<int>(strlen(length_cases[i]));
+
+    Parsed parsed;
+    ParseStandardURL(length_cases[i], true_length, &parsed);
+
+    EXPECT_EQ(true_length, parsed.Length());
+  }
+}
+
+TEST(URLParser, CountCharactersBefore) {
+  struct CountCase {
+    const char* url;
+    Parsed::ComponentType component;
+    bool include_delimiter;
+    int expected_count;
+  } count_cases[] = {
+  // Test each possibility in the case where all components are present.
+  //    0         1         2
+  //    0123456789012345678901
+    {"http://u:p@h:8/p?q#r", Parsed::SCHEME, true, 0},
+    {"http://u:p@h:8/p?q#r", Parsed::SCHEME, false, 0},
+    {"http://u:p@h:8/p?q#r", Parsed::USERNAME, true, 7},
+    {"http://u:p@h:8/p?q#r", Parsed::USERNAME, false, 7},
+    {"http://u:p@h:8/p?q#r", Parsed::PASSWORD, true, 9},
+    {"http://u:p@h:8/p?q#r", Parsed::PASSWORD, false, 9},
+    {"http://u:p@h:8/p?q#r", Parsed::HOST, true, 11},
+    {"http://u:p@h:8/p?q#r", Parsed::HOST, false, 11},
+    {"http://u:p@h:8/p?q#r", Parsed::PORT, true, 12},
+    {"http://u:p@h:8/p?q#r", Parsed::PORT, false, 13},
+    {"http://u:p@h:8/p?q#r", Parsed::PATH, false, 14},
+    {"http://u:p@h:8/p?q#r", Parsed::PATH, true, 14},
+    {"http://u:p@h:8/p?q#r", Parsed::QUERY, true, 16},
+    {"http://u:p@h:8/p?q#r", Parsed::QUERY, false, 17},
+    {"http://u:p@h:8/p?q#r", Parsed::REF, true, 18},
+    {"http://u:p@h:8/p?q#r", Parsed::REF, false, 19},
+      // Now test when the requested component is missing.
+    {"http://u:p@h:8/p?", Parsed::REF, true, 17},
+    {"http://u:p@h:8/p?q", Parsed::REF, true, 18},
+    {"http://u:p@h:8/p#r", Parsed::QUERY, true, 16},
+    {"http://u:p@h:8#r", Parsed::PATH, true, 14},
+    {"http://u:p@h/", Parsed::PORT, true, 12},
+    {"http://u:p@/", Parsed::HOST, true, 11},
+      // This case is a little weird. It will report that the password would
+      // start where the host begins. This is arguably correct, although you
+      // could also argue that it should start at the '@' sign. Doing it
+      // starting with the '@' sign is actually harder, so we don't bother.
+    {"http://u@h/", Parsed::PASSWORD, true, 9},
+    {"http://h/", Parsed::USERNAME, true, 7},
+    {"http:", Parsed::USERNAME, true, 5},
+    {"", Parsed::SCHEME, true, 0},
+      // Make sure a random component still works when there's nothing there.
+    {"", Parsed::REF, true, 0},
+      // File URLs are special with no host, so we test those.
+    {"file:///c:/foo", Parsed::USERNAME, true, 7},
+    {"file:///c:/foo", Parsed::PASSWORD, true, 7},
+    {"file:///c:/foo", Parsed::HOST, true, 7},
+    {"file:///c:/foo", Parsed::PATH, true, 7},
+  };
+  for (size_t i = 0; i < std::size(count_cases); i++) {
+    int length = static_cast<int>(strlen(count_cases[i].url));
+
+    // Simple test to distinguish file and standard URLs.
+    Parsed parsed;
+    if (length > 0 && count_cases[i].url[0] == 'f')
+      ParseFileURL(count_cases[i].url, length, &parsed);
+    else
+      ParseStandardURL(count_cases[i].url, length, &parsed);
+
+    int chars_before = parsed.CountCharactersBefore(
+        count_cases[i].component, count_cases[i].include_delimiter);
+    EXPECT_EQ(count_cases[i].expected_count, chars_before);
+  }
+}
+
+// Standard --------------------------------------------------------------------
+
+// Input                               Scheme  Usrname Passwd     Host         Port Path       Query        Ref
+// ------------------------------------ ------- ------- ---------- ------------ --- ---------- ------------ -----
+static URLParseCase cases[] = {
+  // Regular URL with all the parts
+{"http://user:pass@foo:21/bar;par?b#c", "http", "user", "pass",    "foo",       21, "/bar;par","b",          "c"},
+
+  // Known schemes should lean towards authority identification
+{"http:foo.com",                        "http", NULL,  NULL,      "foo.com",    -1, NULL,      NULL,        NULL},
+
+  // Spaces!
+{"\t   :foo.com   \n",                  "",     NULL,  NULL,      "foo.com",    -1, NULL,      NULL,        NULL},
+{" foo.com  ",                          NULL,   NULL,  NULL,      "foo.com",    -1, NULL,      NULL,        NULL},
+{"a:\t foo.com",                        "a",    NULL,  NULL,      "\t foo.com", -1, NULL,      NULL,        NULL},
+{"http://f:21/ b ? d # e ",             "http", NULL,  NULL,      "f",          21, "/ b ",    " d ",       " e"},
+
+  // Invalid port numbers should be identified and turned into -2, empty port
+  // numbers should be -1. Spaces aren't allowed in port numbers
+{"http://f:/c",                         "http", NULL,  NULL,      "f",          -1, "/c",      NULL,        NULL},
+{"http://f:0/c",                        "http", NULL,  NULL,      "f",           0, "/c",      NULL,        NULL},
+{"http://f:00000000000000/c",           "http", NULL,  NULL,      "f",           0, "/c",      NULL,        NULL},
+{"http://f:00000000000000000000080/c",  "http", NULL,  NULL,      "f",          80, "/c",      NULL,        NULL},
+{"http://f:b/c",                        "http", NULL,  NULL,      "f",          -2, "/c",      NULL,        NULL},
+{"http://f: /c",                        "http", NULL,  NULL,      "f",          -2, "/c",      NULL,        NULL},
+{"http://f:\n/c",                       "http", NULL,  NULL,      "f",          -2, "/c",      NULL,        NULL},
+{"http://f:fifty-two/c",                "http", NULL,  NULL,      "f",          -2, "/c",      NULL,        NULL},
+{"http://f:999999/c",                   "http", NULL,  NULL,      "f",          -2, "/c",      NULL,        NULL},
+{"http://f: 21 / b ? d # e ",           "http", NULL,  NULL,      "f",          -2, "/ b ",    " d ",       " e"},
+
+  // Creative URLs missing key elements
+{"",                                    NULL,   NULL,  NULL,      NULL,         -1, NULL,      NULL,        NULL},
+{"  \t",                                NULL,   NULL,  NULL,      NULL,         -1, NULL,      NULL,        NULL},
+{":foo.com/",                           "",     NULL,  NULL,      "foo.com",    -1, "/",       NULL,        NULL},
+{":foo.com\\",                          "",     NULL,  NULL,      "foo.com",    -1, "\\",      NULL,        NULL},
+{":",                                   "",     NULL,  NULL,      NULL,         -1, NULL,      NULL,        NULL},
+{":a",                                  "",     NULL,  NULL,      "a",          -1, NULL,      NULL,        NULL},
+{":/",                                  "",     NULL,  NULL,      NULL,         -1, NULL,      NULL,        NULL},
+{":\\",                                 "",     NULL,  NULL,      NULL,         -1, NULL,      NULL,        NULL},
+{":#",                                  "",     NULL,  NULL,      NULL,         -1, NULL,      NULL,        ""},
+{"#",                                   NULL,   NULL,  NULL,      NULL,         -1, NULL,      NULL,        ""},
+{"#/",                                  NULL,   NULL,  NULL,      NULL,         -1, NULL,      NULL,        "/"},
+{"#\\",                                 NULL,   NULL,  NULL,      NULL,         -1, NULL,      NULL,        "\\"},
+{"#;?",                                 NULL,   NULL,  NULL,      NULL,         -1, NULL,      NULL,        ";?"},
+{"?",                                   NULL,   NULL,  NULL,      NULL,         -1, NULL,      "",          NULL},
+{"/",                                   NULL,   NULL,  NULL,      NULL,         -1, NULL,      NULL,        NULL},
+{":23",                                 "",     NULL,  NULL,      "23",         -1, NULL,      NULL,        NULL},
+{"/:23",                                "/",    NULL,  NULL,      "23",         -1, NULL,      NULL,        NULL},
+{"//",                                  NULL,   NULL,  NULL,      NULL,         -1, NULL,      NULL,        NULL},
+{"::",                                  "",     NULL,  NULL,      NULL,         -1, NULL,      NULL,        NULL},
+{"::23",                                "",     NULL,  NULL,      NULL,         23, NULL,      NULL,        NULL},
+{"foo://",                              "foo",  NULL,  NULL,      NULL,         -1, NULL,      NULL,        NULL},
+
+  // Username/passwords and things that look like them
+{"http://a:b@c:29/d",                   "http", "a",   "b",       "c",          29, "/d",      NULL,        NULL},
+{"http::@c:29",                         "http", "",    "",        "c",          29, NULL,      NULL,        NULL},
+  // ... "]" in the password field isn't allowed, but we tolerate it here...
+{"http://&a:foo(b]c@d:2/",              "http", "&a",  "foo(b]c", "d",           2, "/",       NULL,        NULL},
+{"http://::@c@d:2",                     "http", "",    ":@c",     "d",           2, NULL,      NULL,        NULL},
+{"http://foo.com:b@d/",                 "http", "foo.com", "b",   "d",          -1, "/",       NULL,        NULL},
+
+{"http://foo.com/\\@",                  "http", NULL,  NULL,      "foo.com",    -1, "/\\@",    NULL,        NULL},
+{"http:\\\\foo.com\\",                  "http", NULL,  NULL,      "foo.com",    -1, "\\",      NULL,        NULL},
+{"http:\\\\a\\b:c\\d@foo.com\\",        "http", NULL,  NULL,      "a",          -1, "\\b:c\\d@foo.com\\", NULL,   NULL},
+
+  // Tolerate different numbers of slashes.
+{"foo:/",                               "foo",  NULL,  NULL,      NULL,         -1, NULL,      NULL,        NULL},
+{"foo:/bar.com/",                       "foo",  NULL,  NULL,      "bar.com",    -1, "/",       NULL,        NULL},
+{"foo://///////",                       "foo",  NULL,  NULL,      NULL,         -1, NULL,      NULL,        NULL},
+{"foo://///////bar.com/",               "foo",  NULL,  NULL,      "bar.com",    -1, "/",       NULL,        NULL},
+{"foo:////://///",                      "foo",  NULL,  NULL,      NULL,         -1, "/////",   NULL,        NULL},
+
+  // Raw file paths on Windows aren't handled by the parser.
+{"c:/foo",                              "c",    NULL,  NULL,      "foo",        -1, NULL,      NULL,        NULL},
+{"//foo/bar",                           NULL,   NULL,  NULL,      "foo",        -1, "/bar",    NULL,        NULL},
+
+  // Use the first question mark for the query and the ref.
+{"http://foo/path;a??e#f#g",            "http", NULL,  NULL,      "foo",        -1, "/path;a", "?e",      "f#g"},
+{"http://foo/abcd?efgh?ijkl",           "http", NULL,  NULL,      "foo",        -1, "/abcd",   "efgh?ijkl", NULL},
+{"http://foo/abcd#foo?bar",             "http", NULL,  NULL,      "foo",        -1, "/abcd",   NULL,        "foo?bar"},
+
+  // IPv6, check also interesting uses of colons.
+{"[61:24:74]:98",                       "[61",  NULL,  NULL,      "24:74]",     98, NULL,      NULL,        NULL},
+{"http://[61:27]:98",                   "http", NULL,  NULL,      "[61:27]",    98, NULL,      NULL,        NULL},
+{"http:[61:27]/:foo",                   "http", NULL,  NULL,      "[61:27]",    -1, "/:foo",   NULL,        NULL},
+{"http://[1::2]:3:4",                   "http", NULL,  NULL,      "[1::2]:3",    4, NULL,      NULL,        NULL},
+
+  // Partially-complete IPv6 literals, and related cases.
+{"http://2001::1",                      "http", NULL,  NULL,      "2001:",       1, NULL,      NULL,        NULL},
+{"http://[2001::1",                     "http", NULL,  NULL,      "[2001::1",   -1, NULL,      NULL,        NULL},
+{"http://2001::1]",                     "http", NULL,  NULL,      "2001::1]",   -1, NULL,      NULL,        NULL},
+{"http://2001::1]:80",                  "http", NULL,  NULL,      "2001::1]",   80, NULL,      NULL,        NULL},
+{"http://[2001::1]",                    "http", NULL,  NULL,      "[2001::1]",  -1, NULL,      NULL,        NULL},
+{"http://[2001::1]:80",                 "http", NULL,  NULL,      "[2001::1]",  80, NULL,      NULL,        NULL},
+{"http://[[::]]",                       "http", NULL,  NULL,      "[[::]]",     -1, NULL,      NULL,        NULL},
+
+};
+
+TEST(URLParser, Standard) {
+  // Declared outside for loop to try to catch cases in init() where we forget
+  // to reset something that is reset by the constructor.
+  Parsed parsed;
+  for (size_t i = 0; i < std::size(cases); i++) {
+    const char* url = cases[i].input;
+    ParseStandardURL(url, static_cast<int>(strlen(url)), &parsed);
+    int port = ParsePort(url, parsed.port);
+
+    EXPECT_TRUE(ComponentMatches(url, cases[i].scheme, parsed.scheme));
+    EXPECT_TRUE(ComponentMatches(url, cases[i].username, parsed.username));
+    EXPECT_TRUE(ComponentMatches(url, cases[i].password, parsed.password));
+    EXPECT_TRUE(ComponentMatches(url, cases[i].host, parsed.host));
+    EXPECT_EQ(cases[i].port, port);
+    EXPECT_TRUE(ComponentMatches(url, cases[i].path, parsed.path));
+    EXPECT_TRUE(ComponentMatches(url, cases[i].query, parsed.query));
+    EXPECT_TRUE(ComponentMatches(url, cases[i].ref, parsed.ref));
+  }
+}
+
+// PathURL --------------------------------------------------------------------
+
+// Various incarnations of path URLs.
+static PathURLParseCase path_cases[] = {
+{"",                                        NULL,          NULL},
+{":",                                       "",            NULL},
+{":/",                                      "",            "/"},
+{"/",                                       NULL,          "/"},
+{" This is \\interesting// \t",             NULL,          "This is \\interesting// \t"},
+{"about:",                                  "about",       NULL},
+{"about:blank",                             "about",       "blank"},
+{"  about: blank ",                         "about",       " blank "},
+{"javascript :alert(\"He:/l\\l#o?foo\"); ", "javascript ", "alert(\"He:/l\\l#o?foo\"); "},
+};
+
+TEST(URLParser, PathURL) {
+  // Declared outside for loop to try to catch cases in init() where we forget
+  // to reset something that is reset by the constructor.
+  Parsed parsed;
+  for (size_t i = 0; i < std::size(path_cases); i++) {
+    const char* url = path_cases[i].input;
+    ParsePathURL(url, static_cast<int>(strlen(url)), false, &parsed);
+
+    EXPECT_TRUE(ComponentMatches(url, path_cases[i].scheme, parsed.scheme))
+        << i;
+    EXPECT_TRUE(ComponentMatches(url, path_cases[i].path, parsed.GetContent()))
+        << i;
+
+    // The remaining components are never used for path URLs.
+    ExpectInvalidComponent(parsed.username);
+    ExpectInvalidComponent(parsed.password);
+    ExpectInvalidComponent(parsed.host);
+    ExpectInvalidComponent(parsed.port);
+  }
+}
+
+// Various incarnations of file URLs.
+static URLParseCase file_cases[] = {
+#ifdef WIN32
+{"file:server",              "file", NULL, NULL, "server", -1, NULL,          NULL, NULL},
+{"  file: server  \t",       "file", NULL, NULL, " server",-1, NULL,          NULL, NULL},
+{"FiLe:c|",                  "FiLe", NULL, NULL, NULL,     -1, "c|",          NULL, NULL},
+{"FILE:/\\\\/server/file",   "FILE", NULL, NULL, "server", -1, "/file",       NULL, NULL},
+{"file://server/",           "file", NULL, NULL, "server", -1, "/",           NULL, NULL},
+{"file://localhost/c:/",     "file", NULL, NULL, "localhost", -1, "/c:/",     NULL, NULL},
+{"file://127.0.0.1/c|\\",    "file", NULL, NULL, "127.0.0.1", -1, "/c|\\",    NULL, NULL},
+{"file:/",                   "file", NULL, NULL, NULL,     -1, NULL,          NULL, NULL},
+{"file:",                    "file", NULL, NULL, NULL,     -1, NULL,          NULL, NULL},
+  // If there is a Windows drive letter, treat any number of slashes as the
+  // path part.
+{"file:c:\\fo\\b",           "file", NULL, NULL, NULL,     -1, "c:\\fo\\b",   NULL, NULL},
+{"file:/c:\\foo/bar",        "file", NULL, NULL, NULL,     -1, "/c:\\foo/bar",NULL, NULL},
+{"file://c:/f\\b",           "file", NULL, NULL, NULL,     -1, "/c:/f\\b",    NULL, NULL},
+{"file:///C:/foo",           "file", NULL, NULL, NULL,     -1, "/C:/foo",     NULL, NULL},
+{"file://///\\/\\/c:\\f\\b", "file", NULL, NULL, NULL,     -1, "/c:\\f\\b",   NULL, NULL},
+  // If there is not a drive letter, we should treat is as UNC EXCEPT for
+  // three slashes, which we treat as a Unix style path.
+{"file:server/file",         "file", NULL, NULL, "server", -1, "/file",       NULL, NULL},
+{"file:/server/file",        "file", NULL, NULL, "server", -1, "/file",       NULL, NULL},
+{"file://server/file",       "file", NULL, NULL, "server", -1, "/file",       NULL, NULL},
+{"file:///server/file",      "file", NULL, NULL, NULL,     -1, "/server/file",NULL, NULL},
+{"file://\\server/file",     "file", NULL, NULL, NULL,     -1, "\\server/file",NULL, NULL},
+{"file:////server/file",     "file", NULL, NULL, "server", -1, "/file",       NULL, NULL},
+  // Queries and refs are valid for file URLs as well.
+{"file:///C:/foo.html?#",   "file", NULL, NULL,  NULL,     -1, "/C:/foo.html",  "",   ""},
+{"file:///C:/foo.html?query=yes#ref", "file", NULL, NULL, NULL, -1, "/C:/foo.html", "query=yes", "ref"},
+#else  // WIN32
+  // No slashes.
+  {"file:",                    "file", NULL, NULL, NULL,      -1, NULL,             NULL, NULL},
+  {"file:path",                "file", NULL, NULL, NULL,      -1, "path",           NULL, NULL},
+  {"file:path/",               "file", NULL, NULL, NULL,      -1, "path/",          NULL, NULL},
+  {"file:path/f.txt",          "file", NULL, NULL, NULL,      -1, "path/f.txt",     NULL, NULL},
+  // One slash.
+  {"file:/",                   "file", NULL, NULL, NULL,      -1, "/",              NULL, NULL},
+  {"file:/path",               "file", NULL, NULL, NULL,      -1, "/path",          NULL, NULL},
+  {"file:/path/",              "file", NULL, NULL, NULL,      -1, "/path/",         NULL, NULL},
+  {"file:/path/f.txt",         "file", NULL, NULL, NULL,      -1, "/path/f.txt",    NULL, NULL},
+  // Two slashes.
+  {"file://",                  "file", NULL, NULL, NULL,      -1, NULL,             NULL, NULL},
+  {"file://server",            "file", NULL, NULL, "server",  -1, NULL,             NULL, NULL},
+  {"file://server/",           "file", NULL, NULL, "server",  -1, "/",              NULL, NULL},
+  {"file://server/f.txt",      "file", NULL, NULL, "server",  -1, "/f.txt",         NULL, NULL},
+  // Three slashes.
+  {"file:///",                 "file", NULL, NULL, NULL,      -1, "/",              NULL, NULL},
+  {"file:///path",             "file", NULL, NULL, NULL,      -1, "/path",          NULL, NULL},
+  {"file:///path/",            "file", NULL, NULL, NULL,      -1, "/path/",         NULL, NULL},
+  {"file:///path/f.txt",       "file", NULL, NULL, NULL,      -1, "/path/f.txt",    NULL, NULL},
+  // More than three slashes.
+  {"file:////",                "file", NULL, NULL, NULL,      -1, "/",              NULL, NULL},
+  {"file:////path",            "file", NULL, NULL, NULL,      -1, "/path",          NULL, NULL},
+  {"file:////path/",           "file", NULL, NULL, NULL,      -1, "/path/",         NULL, NULL},
+  {"file:////path/f.txt",      "file", NULL, NULL, NULL,      -1, "/path/f.txt",    NULL, NULL},
+  // Schemeless URLs
+  {"path/f.txt",               NULL,   NULL, NULL, NULL,       -1, "path/f.txt",    NULL, NULL},
+  {"path:80/f.txt",            "path", NULL, NULL, NULL,       -1, "80/f.txt",      NULL, NULL},
+  {"path/f.txt:80",            "path/f.txt",NULL, NULL, NULL,  -1, "80",            NULL, NULL}, // Wrong.
+  {"/path/f.txt",              NULL,   NULL, NULL, NULL,       -1, "/path/f.txt",   NULL, NULL},
+  {"/path:80/f.txt",           NULL,   NULL, NULL, NULL,       -1, "/path:80/f.txt",NULL, NULL},
+  {"/path/f.txt:80",           NULL,   NULL, NULL, NULL,       -1, "/path/f.txt:80",NULL, NULL},
+  {"//server/f.txt",           NULL,   NULL, NULL, "server",   -1, "/f.txt",        NULL, NULL},
+  {"//server:80/f.txt",        NULL,   NULL, NULL, "server:80",-1, "/f.txt",        NULL, NULL},
+  {"//server/f.txt:80",        NULL,   NULL, NULL, "server",   -1, "/f.txt:80",     NULL, NULL},
+  {"///path/f.txt",            NULL,   NULL, NULL, NULL,       -1, "/path/f.txt",   NULL, NULL},
+  {"///path:80/f.txt",         NULL,   NULL, NULL, NULL,       -1, "/path:80/f.txt",NULL, NULL},
+  {"///path/f.txt:80",         NULL,   NULL, NULL, NULL,       -1, "/path/f.txt:80",NULL, NULL},
+  {"////path/f.txt",           NULL,   NULL, NULL, NULL,       -1, "/path/f.txt",   NULL, NULL},
+  {"////path:80/f.txt",        NULL,   NULL, NULL, NULL,       -1, "/path:80/f.txt",NULL, NULL},
+  {"////path/f.txt:80",        NULL,   NULL, NULL, NULL,       -1, "/path/f.txt:80",NULL, NULL},
+  // Queries and refs are valid for file URLs as well.
+  {"file:///foo.html?#",       "file", NULL, NULL, NULL,       -1, "/foo.html",     "",   ""},
+  {"file:///foo.html?q=y#ref", "file", NULL, NULL, NULL,       -1, "/foo.html",    "q=y", "ref"},
+#endif  // WIN32
+};
+
+TEST(URLParser, ParseFileURL) {
+  // Declared outside for loop to try to catch cases in init() where we forget
+  // to reset something that is reset by the construtor.
+  Parsed parsed;
+  for (size_t i = 0; i < std::size(file_cases); i++) {
+    const char* url = file_cases[i].input;
+    ParseFileURL(url, static_cast<int>(strlen(url)), &parsed);
+    int port = ParsePort(url, parsed.port);
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].scheme, parsed.scheme))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.scheme.begin << ", " << parsed.scheme.len;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].username, parsed.username))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.username.begin << ", " << parsed.username.len;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].password, parsed.password))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.password.begin << ", " << parsed.password.len;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].host, parsed.host))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.host.begin << ", " << parsed.host.len;
+
+    EXPECT_EQ(file_cases[i].port, port)
+        << " for case #" << i << " [ " << url << "] " << port;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].path, parsed.path))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.path.begin << ", " << parsed.path.len;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].query, parsed.query))
+        << " for case #" << i << " [" << url << "] "
+        << parsed.query.begin << ", " << parsed.query.len;
+
+    EXPECT_TRUE(ComponentMatches(url, file_cases[i].ref, parsed.ref))
+        << " for case #" << i << " [ "<< url << "] "
+        << parsed.query.begin << ", " << parsed.scheme.len;
+  }
+}
+
+
+TEST(URLParser, ExtractFileName) {
+  struct FileCase {
+    const char* input;
+    const char* expected;
+  } extract_cases[] = {
+      {"http://www.google.com", nullptr},
+      {"http://www.google.com/", ""},
+      {"http://www.google.com/search", "search"},
+      {"http://www.google.com/search/", ""},
+      {"http://www.google.com/foo/bar.html?baz=22", "bar.html"},
+      {"http://www.google.com/foo/bar.html#ref", "bar.html"},
+      {"http://www.google.com/search/;param", ""},
+      {"http://www.google.com/foo/bar.html;param#ref", "bar.html"},
+      {"http://www.google.com/foo/bar.html;foo;param#ref", "bar.html"},
+      {"http://www.google.com/foo/bar.html?query#ref", "bar.html"},
+      {"http://www.google.com/foo;/bar.html", "bar.html"},
+      {"http://www.google.com/foo;/", ""},
+      {"http://www.google.com/foo;", "foo"},
+      {"http://www.google.com/;", ""},
+      {"http://www.google.com/foo;bar;html", "foo"},
+  };
+
+  for (size_t i = 0; i < std::size(extract_cases); i++) {
+    const char* url = extract_cases[i].input;
+    int len = static_cast<int>(strlen(url));
+
+    Parsed parsed;
+    ParseStandardURL(url, len, &parsed);
+
+    Component file_name;
+    ExtractFileName(url, parsed.path, &file_name);
+
+    EXPECT_TRUE(ComponentMatches(url, extract_cases[i].expected, file_name));
+  }
+}
+
+// Returns true if the parameter with index |parameter| in the given URL's
+// query string. The expected key can be NULL to indicate no such key index
+// should exist. The parameter number is 1-based.
+static bool NthParameterIs(const char* url,
+                           int parameter,
+                           const char* expected_key,
+                           const char* expected_value) {
+  Parsed parsed;
+  ParseStandardURL(url, static_cast<int>(strlen(url)), &parsed);
+
+  Component query = parsed.query;
+
+  for (int i = 1; i <= parameter; i++) {
+    Component key, value;
+    if (!ExtractQueryKeyValue(url, &query, &key, &value)) {
+      if (parameter >= i && !expected_key)
+        return true;  // Expected nonexistent key, got one.
+      return false;  // Not enough keys.
+    }
+
+    if (i == parameter) {
+      if (!expected_key)
+        return false;
+
+      if (strncmp(&url[key.begin], expected_key, key.len) != 0)
+        return false;
+      if (strncmp(&url[value.begin], expected_value, value.len) != 0)
+        return false;
+      return true;
+    }
+  }
+  return expected_key == NULL;  // We didn't find that many parameters.
+}
+
+TEST(URLParser, ExtractQueryKeyValue) {
+  EXPECT_TRUE(NthParameterIs("http://www.google.com", 1, NULL, NULL));
+
+  // Basic case.
+  char a[] = "http://www.google.com?arg1=1&arg2=2&bar";
+  EXPECT_TRUE(NthParameterIs(a, 1, "arg1", "1"));
+  EXPECT_TRUE(NthParameterIs(a, 2, "arg2", "2"));
+  EXPECT_TRUE(NthParameterIs(a, 3, "bar", ""));
+  EXPECT_TRUE(NthParameterIs(a, 4, NULL, NULL));
+
+  // Empty param at the end.
+  char b[] = "http://www.google.com?foo=bar&";
+  EXPECT_TRUE(NthParameterIs(b, 1, "foo", "bar"));
+  EXPECT_TRUE(NthParameterIs(b, 2, NULL, NULL));
+
+  // Empty param at the beginning.
+  char c[] = "http://www.google.com?&foo=bar";
+  EXPECT_TRUE(NthParameterIs(c, 1, "", ""));
+  EXPECT_TRUE(NthParameterIs(c, 2, "foo", "bar"));
+  EXPECT_TRUE(NthParameterIs(c, 3, NULL, NULL));
+
+  // Empty key with value.
+  char d[] = "http://www.google.com?=foo";
+  EXPECT_TRUE(NthParameterIs(d, 1, "", "foo"));
+  EXPECT_TRUE(NthParameterIs(d, 2, NULL, NULL));
+
+  // Empty value with key.
+  char e[] = "http://www.google.com?foo=";
+  EXPECT_TRUE(NthParameterIs(e, 1, "foo", ""));
+  EXPECT_TRUE(NthParameterIs(e, 2, NULL, NULL));
+
+  // Empty key and values.
+  char f[] = "http://www.google.com?&&==&=";
+  EXPECT_TRUE(NthParameterIs(f, 1, "", ""));
+  EXPECT_TRUE(NthParameterIs(f, 2, "", ""));
+  EXPECT_TRUE(NthParameterIs(f, 3, "", "="));
+  EXPECT_TRUE(NthParameterIs(f, 4, "", ""));
+  EXPECT_TRUE(NthParameterIs(f, 5, NULL, NULL));
+}
+
+// MailtoURL --------------------------------------------------------------------
+
+static MailtoURLParseCase mailto_cases[] = {
+//|input                       |scheme   |path               |query
+{"mailto:foo@gmail.com",        "mailto", "foo@gmail.com",    NULL},
+{"  mailto: to  \t",            "mailto", " to",              NULL},
+{"mailto:addr1%2C%20addr2 ",    "mailto", "addr1%2C%20addr2", NULL},
+{"Mailto:addr1, addr2 ",        "Mailto", "addr1, addr2",     NULL},
+{"mailto:addr1:addr2 ",         "mailto", "addr1:addr2",      NULL},
+{"mailto:?to=addr1,addr2",      "mailto", NULL,               "to=addr1,addr2"},
+{"mailto:?to=addr1%2C%20addr2", "mailto", NULL,               "to=addr1%2C%20addr2"},
+{"mailto:addr1?to=addr2",       "mailto", "addr1",            "to=addr2"},
+{"mailto:?body=#foobar#",       "mailto", NULL,               "body=#foobar#",},
+{"mailto:#?body=#foobar#",      "mailto", "#",                "body=#foobar#"},
+};
+
+TEST(URLParser, MailtoUrl) {
+  // Declared outside for loop to try to catch cases in init() where we forget
+  // to reset something that is reset by the constructor.
+  Parsed parsed;
+  for (size_t i = 0; i < std::size(mailto_cases); ++i) {
+    const char* url = mailto_cases[i].input;
+    ParseMailtoURL(url, static_cast<int>(strlen(url)), &parsed);
+    int port = ParsePort(url, parsed.port);
+
+    EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].scheme, parsed.scheme));
+    EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].path, parsed.path));
+    EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].query, parsed.query));
+    EXPECT_EQ(PORT_UNSPECIFIED, port);
+
+    // The remaining components are never used for mailto URLs.
+    ExpectInvalidComponent(parsed.username);
+    ExpectInvalidComponent(parsed.password);
+    ExpectInvalidComponent(parsed.port);
+    ExpectInvalidComponent(parsed.ref);
+  }
+}
+
+// Various incarnations of filesystem URLs.
+static FileSystemURLParseCase filesystem_cases[] = {
+  // Regular URL with all the parts
+{"filesystem:http://user:pass@foo:21/temporary/bar;par?b#c", "http",  "user", "pass", "foo", 21, "/temporary",  "/bar;par",  "b",  "c"},
+{"filesystem:https://foo/persistent/bar;par/",               "https", NULL,   NULL,   "foo", -1, "/persistent", "/bar;par/", NULL, NULL},
+{"filesystem:file:///persistent/bar;par/",                   "file", NULL,    NULL,   NULL,  -1, "/persistent", "/bar;par/", NULL, NULL},
+{"filesystem:file:///persistent/bar;par/?query#ref",                   "file", NULL,    NULL,   NULL,  -1, "/persistent", "/bar;par/", "query", "ref"},
+{"filesystem:file:///persistent",                            "file", NULL,    NULL,   NULL,  -1, "/persistent", "",        NULL, NULL},
+};
+
+TEST(URLParser, FileSystemURL) {
+  // Declared outside for loop to try to catch cases in init() where we forget
+  // to reset something that is reset by the constructor.
+  Parsed parsed;
+  for (size_t i = 0; i < std::size(filesystem_cases); i++) {
+    const FileSystemURLParseCase* parsecase = &filesystem_cases[i];
+    const char* url = parsecase->input;
+    ParseFileSystemURL(url, static_cast<int>(strlen(url)), &parsed);
+
+    EXPECT_TRUE(ComponentMatches(url, "filesystem", parsed.scheme));
+    EXPECT_EQ(!parsecase->inner_scheme, !parsed.inner_parsed());
+    // Only check the inner_parsed if there is one.
+    if (parsed.inner_parsed()) {
+      EXPECT_TRUE(ComponentMatches(url, parsecase->inner_scheme,
+          parsed.inner_parsed()->scheme));
+      EXPECT_TRUE(ComponentMatches(url, parsecase->inner_username,
+          parsed.inner_parsed()->username));
+      EXPECT_TRUE(ComponentMatches(url, parsecase->inner_password,
+          parsed.inner_parsed()->password));
+      EXPECT_TRUE(ComponentMatches(url, parsecase->inner_host,
+          parsed.inner_parsed()->host));
+      int port = ParsePort(url, parsed.inner_parsed()->port);
+      EXPECT_EQ(parsecase->inner_port, port);
+
+      // The remaining components are never used for filesystem URLs.
+      ExpectInvalidComponent(parsed.inner_parsed()->query);
+      ExpectInvalidComponent(parsed.inner_parsed()->ref);
+    }
+
+    EXPECT_TRUE(ComponentMatches(url, parsecase->path, parsed.path));
+    EXPECT_TRUE(ComponentMatches(url, parsecase->query, parsed.query));
+    EXPECT_TRUE(ComponentMatches(url, parsecase->ref, parsed.ref));
+
+    // The remaining components are never used for filesystem URLs.
+    ExpectInvalidComponent(parsed.username);
+    ExpectInvalidComponent(parsed.password);
+    ExpectInvalidComponent(parsed.host);
+    ExpectInvalidComponent(parsed.port);
+  }
+}
+
+}  // namespace
+}  // namespace url
diff --git a/url_test_utils.h b/url_test_utils.h
new file mode 100644
index 00000000000..e1be7fc5087
--- /dev/null
+++ b/url_test_utils.h
@@ -0,0 +1,39 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_TEST_UTILS_H_
+#define URL_URL_TEST_UTILS_H_
+
+// Convenience functions for string conversions.
+// These are mostly intended for use in unit tests.
+
+#include <string>
+
+#include "base/strings/utf_string_conversions.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/url_canon_internal.h"
+
+namespace url {
+
+namespace test_utils {
+
+// Converts a UTF-16 string from native wchar_t format to char16 by
+// truncating the high 32 bits. This is different than the conversion function
+// in base bacause it passes invalid UTF-16 characters which is important for
+// test purposes. As a result, this is not meant to handle true UTF-32 encoded
+// strings.
+inline std::u16string TruncateWStringToUTF16(const wchar_t* src) {
+  std::u16string str;
+  int length = static_cast<int>(wcslen(src));
+  for (int i = 0; i < length; ++i) {
+    str.push_back(static_cast<char16_t>(src[i]));
+  }
+  return str;
+}
+
+}  // namespace test_utils
+
+}  // namespace url
+
+#endif  // URL_URL_TEST_UTILS_H_
diff --git a/url_util.cc b/url_util.cc
new file mode 100644
index 00000000000..67913eb72f7
--- /dev/null
+++ b/url_util.cc
@@ -0,0 +1,933 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/url_util.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include <atomic>
+#include <ostream>
+
+#include "base/check_op.h"
+#include "base/compiler_specific.h"
+#include "base/containers/contains.h"
+#include "base/no_destructor.h"
+#include "base/strings/string_util.h"
+#include "url/url_canon_internal.h"
+#include "url/url_constants.h"
+#include "url/url_file.h"
+#include "url/url_util_internal.h"
+
+namespace url {
+
+namespace {
+
+// A pair for representing a standard scheme name and the SchemeType for it.
+struct SchemeWithType {
+  std::string scheme;
+  SchemeType type;
+};
+
+// A pair for representing a scheme and a custom protocol handler for it.
+//
+// This pair of strings must be normalized protocol handler parameters as
+// described in the Custom Handler specification.
+// https://html.spec.whatwg.org/multipage/system-state.html#normalize-protocol-handler-parameters
+struct SchemeWithHandler {
+  std::string scheme;
+  std::string handler;
+};
+
+// List of currently registered schemes and associated properties.
+struct SchemeRegistry {
+  // Standard format schemes (see header for details).
+  std::vector<SchemeWithType> standard_schemes = {
+      {kHttpsScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
+      {kHttpScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
+      // Yes, file URLs can have a hostname, so file URLs should be handled as
+      // "standard". File URLs never have a port as specified by the SchemeType
+      // field.  Unlike other SCHEME_WITH_HOST schemes, the 'host' in a file
+      // URL may be empty, a behavior which is special-cased during
+      // canonicalization.
+      {kFileScheme, SCHEME_WITH_HOST},
+      {kFtpScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
+      {kWssScheme,
+       SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},  // WebSocket secure.
+      {kWsScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},  // WebSocket.
+      {kFileSystemScheme, SCHEME_WITHOUT_AUTHORITY},
+  };
+
+  // Schemes that are allowed for referrers.
+  //
+  // WARNING: Adding (1) a non-"standard" scheme or (2) a scheme whose URLs have
+  // opaque origins could lead to surprising behavior in some of the referrer
+  // generation logic. In order to avoid surprises, be sure to have adequate
+  // test coverage in each of the multiple code locations that compute
+  // referrers.
+  std::vector<SchemeWithType> referrer_schemes = {
+      {kHttpsScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
+      {kHttpScheme, SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION},
+  };
+
+  // Schemes that do not trigger mixed content warning.
+  std::vector<std::string> secure_schemes = {
+      kHttpsScheme,
+      kWssScheme,
+      kDataScheme,
+      kAboutScheme,
+  };
+
+  // Schemes that normal pages cannot link to or access (i.e., with the same
+  // security rules as those applied to "file" URLs).
+  std::vector<std::string> local_schemes = {
+      kFileScheme,
+  };
+
+  // Schemes that cause pages loaded with them to not have access to pages
+  // loaded with any other URL scheme.
+  std::vector<std::string> no_access_schemes = {
+      kAboutScheme,
+      kJavaScriptScheme,
+      kDataScheme,
+  };
+
+  // Schemes that can be sent CORS requests.
+  std::vector<std::string> cors_enabled_schemes = {
+      kHttpsScheme,
+      kHttpScheme,
+      kDataScheme,
+  };
+
+  // Schemes that can be used by web to store data (local storage, etc).
+  std::vector<std::string> web_storage_schemes = {
+      kHttpsScheme, kHttpScheme, kFileScheme, kFtpScheme, kWssScheme, kWsScheme,
+  };
+
+  // Schemes that can bypass the Content-Security-Policy (CSP) checks.
+  std::vector<std::string> csp_bypassing_schemes = {};
+
+  // Schemes that are strictly empty documents, allowing them to commit
+  // synchronously.
+  std::vector<std::string> empty_document_schemes = {
+      kAboutScheme,
+  };
+
+  // Schemes with a predefined default custom handler.
+  std::vector<SchemeWithHandler> predefined_handler_schemes;
+
+  bool allow_non_standard_schemes = false;
+};
+
+// See the LockSchemeRegistries declaration in the header.
+bool scheme_registries_locked = false;
+
+// Ensure that the schemes aren't modified after first use.
+static std::atomic<bool> g_scheme_registries_used{false};
+
+// Gets the scheme registry without locking the schemes. This should *only* be
+// used for adding schemes to the registry.
+SchemeRegistry* GetSchemeRegistryWithoutLocking() {
+  static base::NoDestructor<SchemeRegistry> registry;
+  return registry.get();
+}
+
+const SchemeRegistry& GetSchemeRegistry() {
+#if DCHECK_IS_ON()
+  g_scheme_registries_used.store(true);
+#endif
+  return *GetSchemeRegistryWithoutLocking();
+}
+
+// Pass this enum through for methods which would like to know if whitespace
+// removal is necessary.
+enum WhitespaceRemovalPolicy {
+  REMOVE_WHITESPACE,
+  DO_NOT_REMOVE_WHITESPACE,
+};
+
+// This template converts a given character type to the corresponding
+// StringPiece type.
+template<typename CHAR> struct CharToStringPiece {
+};
+template<> struct CharToStringPiece<char> {
+  typedef base::StringPiece Piece;
+};
+template <>
+struct CharToStringPiece<char16_t> {
+  typedef base::StringPiece16 Piece;
+};
+
+// Given a string and a range inside the string, compares it to the given
+// lower-case |compare_to| buffer.
+template<typename CHAR>
+inline bool DoCompareSchemeComponent(const CHAR* spec,
+                                     const Component& component,
+                                     const char* compare_to) {
+  if (component.is_empty())
+    return compare_to[0] == 0;  // When component is empty, match empty scheme.
+  return base::EqualsCaseInsensitiveASCII(
+      typename CharToStringPiece<CHAR>::Piece(&spec[component.begin],
+                                              component.len),
+      compare_to);
+}
+
+// Returns true and sets |type| to the SchemeType of the given scheme
+// identified by |scheme| within |spec| if in |schemes|.
+template<typename CHAR>
+bool DoIsInSchemes(const CHAR* spec,
+                   const Component& scheme,
+                   SchemeType* type,
+                   const std::vector<SchemeWithType>& schemes) {
+  if (scheme.is_empty())
+    return false;  // Empty or invalid schemes are non-standard.
+
+  for (const SchemeWithType& scheme_with_type : schemes) {
+    if (base::EqualsCaseInsensitiveASCII(
+            typename CharToStringPiece<CHAR>::Piece(&spec[scheme.begin],
+                                                    scheme.len),
+            scheme_with_type.scheme)) {
+      *type = scheme_with_type.type;
+      return true;
+    }
+  }
+  return false;
+}
+
+template<typename CHAR>
+bool DoIsStandard(const CHAR* spec, const Component& scheme, SchemeType* type) {
+  return DoIsInSchemes(spec, scheme, type,
+                       GetSchemeRegistry().standard_schemes);
+}
+
+
+template<typename CHAR>
+bool DoFindAndCompareScheme(const CHAR* str,
+                            int str_len,
+                            const char* compare,
+                            Component* found_scheme) {
+  // Before extracting scheme, canonicalize the URL to remove any whitespace.
+  // This matches the canonicalization done in DoCanonicalize function.
+  STACK_UNINITIALIZED RawCanonOutputT<CHAR> whitespace_buffer;
+  int spec_len;
+  const CHAR* spec =
+      RemoveURLWhitespace(str, str_len, &whitespace_buffer, &spec_len, nullptr);
+
+  Component our_scheme;
+  if (!ExtractScheme(spec, spec_len, &our_scheme)) {
+    // No scheme.
+    if (found_scheme)
+      *found_scheme = Component();
+    return false;
+  }
+  if (found_scheme)
+    *found_scheme = our_scheme;
+  return DoCompareSchemeComponent(spec, our_scheme, compare);
+}
+
+template <typename CHAR>
+bool DoCanonicalize(const CHAR* spec,
+                    int spec_len,
+                    bool trim_path_end,
+                    WhitespaceRemovalPolicy whitespace_policy,
+                    CharsetConverter* charset_converter,
+                    CanonOutput* output,
+                    Parsed* output_parsed) {
+  // Trim leading C0 control characters and spaces.
+  int begin = 0;
+  TrimURL(spec, &begin, &spec_len, trim_path_end);
+  DCHECK(0 <= begin && begin <= spec_len);
+  spec += begin;
+  spec_len -= begin;
+
+  output->ReserveSizeIfNeeded(spec_len);
+
+  // Remove any whitespace from the middle of the relative URL if necessary.
+  // Possibly this will result in copying to the new buffer.
+  STACK_UNINITIALIZED RawCanonOutputT<CHAR> whitespace_buffer;
+  if (whitespace_policy == REMOVE_WHITESPACE) {
+    spec = RemoveURLWhitespace(spec, spec_len, &whitespace_buffer, &spec_len,
+                               &output_parsed->potentially_dangling_markup);
+  }
+
+  Parsed parsed_input;
+#ifdef WIN32
+  // For Windows, we allow things that look like absolute Windows paths to be
+  // fixed up magically to file URLs. This is done for IE compatibility. For
+  // example, this will change "c:/foo" into a file URL rather than treating
+  // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt").
+  // There is similar logic in url_canon_relative.cc for
+  //
+  // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which
+  // has no meaning as an absolute path name. This is because browsers on Mac
+  // & Unix don't generally do this, so there is no compatibility reason for
+  // doing so.
+  if (DoesBeginUNCPath(spec, 0, spec_len, false) ||
+      DoesBeginWindowsDriveSpec(spec, 0, spec_len)) {
+    ParseFileURL(spec, spec_len, &parsed_input);
+    return CanonicalizeFileURL(spec, spec_len, parsed_input, charset_converter,
+                               output, output_parsed);
+  }
+#endif
+
+  Component scheme;
+  if (!ExtractScheme(spec, spec_len, &scheme))
+    return false;
+
+  // This is the parsed version of the input URL, we have to canonicalize it
+  // before storing it in our object.
+  bool success;
+  SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
+  if (DoCompareSchemeComponent(spec, scheme, url::kFileScheme)) {
+    // File URLs are special.
+    ParseFileURL(spec, spec_len, &parsed_input);
+    success = CanonicalizeFileURL(spec, spec_len, parsed_input,
+                                  charset_converter, output, output_parsed);
+  } else if (DoCompareSchemeComponent(spec, scheme, url::kFileSystemScheme)) {
+    // Filesystem URLs are special.
+    ParseFileSystemURL(spec, spec_len, &parsed_input);
+    success = CanonicalizeFileSystemURL(spec, spec_len, parsed_input,
+                                        charset_converter, output,
+                                        output_parsed);
+
+  } else if (DoIsStandard(spec, scheme, &scheme_type)) {
+    // All "normal" URLs.
+    ParseStandardURL(spec, spec_len, &parsed_input);
+    success = CanonicalizeStandardURL(spec, spec_len, parsed_input, scheme_type,
+                                      charset_converter, output, output_parsed);
+
+  } else if (DoCompareSchemeComponent(spec, scheme, url::kMailToScheme)) {
+    // Mailto URLs are treated like standard URLs, with only a scheme, path,
+    // and query.
+    ParseMailtoURL(spec, spec_len, &parsed_input);
+    success = CanonicalizeMailtoURL(spec, spec_len, parsed_input, output,
+                                    output_parsed);
+
+  } else {
+    // "Weird" URLs like data: and javascript:.
+    ParsePathURL(spec, spec_len, trim_path_end, &parsed_input);
+    success = CanonicalizePathURL(spec, spec_len, parsed_input, output,
+                                  output_parsed);
+  }
+  return success;
+}
+
+template<typename CHAR>
+bool DoResolveRelative(const char* base_spec,
+                       int base_spec_len,
+                       const Parsed& base_parsed,
+                       const CHAR* in_relative,
+                       int in_relative_length,
+                       CharsetConverter* charset_converter,
+                       CanonOutput* output,
+                       Parsed* output_parsed) {
+  // Remove any whitespace from the middle of the relative URL, possibly
+  // copying to the new buffer.
+  STACK_UNINITIALIZED RawCanonOutputT<CHAR> whitespace_buffer;
+  int relative_length;
+  const CHAR* relative = RemoveURLWhitespace(
+      in_relative, in_relative_length, &whitespace_buffer, &relative_length,
+      &output_parsed->potentially_dangling_markup);
+
+  bool base_is_authority_based = false;
+  bool base_is_hierarchical = false;
+  if (base_spec &&
+      base_parsed.scheme.is_nonempty()) {
+    int after_scheme = base_parsed.scheme.end() + 1;  // Skip past the colon.
+    int num_slashes = CountConsecutiveSlashes(base_spec, after_scheme,
+                                              base_spec_len);
+    base_is_authority_based = num_slashes > 1;
+    base_is_hierarchical = num_slashes > 0;
+  }
+
+  SchemeType unused_scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
+  bool standard_base_scheme =
+      base_parsed.scheme.is_nonempty() &&
+      DoIsStandard(base_spec, base_parsed.scheme, &unused_scheme_type);
+
+  bool is_relative;
+  Component relative_component;
+  if (!IsRelativeURL(base_spec, base_parsed, relative, relative_length,
+                     (base_is_hierarchical || standard_base_scheme),
+                     &is_relative, &relative_component)) {
+    // Error resolving.
+    return false;
+  }
+
+  // Don't reserve buffer space here. Instead, reserve in DoCanonicalize and
+  // ReserveRelativeURL, to enable more accurate buffer sizes.
+
+  // Pretend for a moment that |base_spec| is a standard URL. Normally
+  // non-standard URLs are treated as PathURLs, but if the base has an
+  // authority we would like to preserve it.
+  if (is_relative && base_is_authority_based && !standard_base_scheme) {
+    Parsed base_parsed_authority;
+    ParseStandardURL(base_spec, base_spec_len, &base_parsed_authority);
+    if (base_parsed_authority.host.is_nonempty()) {
+      STACK_UNINITIALIZED RawCanonOutputT<char> temporary_output;
+      bool did_resolve_succeed =
+          ResolveRelativeURL(base_spec, base_parsed_authority, false, relative,
+                             relative_component, charset_converter,
+                             &temporary_output, output_parsed);
+      // The output_parsed is incorrect at this point (because it was built
+      // based on base_parsed_authority instead of base_parsed) and needs to be
+      // re-created.
+      DoCanonicalize(temporary_output.data(), temporary_output.length(), true,
+                     REMOVE_WHITESPACE, charset_converter, output,
+                     output_parsed);
+      return did_resolve_succeed;
+    }
+  } else if (is_relative) {
+    // Relative, resolve and canonicalize.
+    bool file_base_scheme = base_parsed.scheme.is_nonempty() &&
+        DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme);
+    return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative,
+                              relative_component, charset_converter, output,
+                              output_parsed);
+  }
+
+  // Not relative, canonicalize the input.
+  return DoCanonicalize(relative, relative_length, true,
+                        DO_NOT_REMOVE_WHITESPACE, charset_converter, output,
+                        output_parsed);
+}
+
+template<typename CHAR>
+bool DoReplaceComponents(const char* spec,
+                         int spec_len,
+                         const Parsed& parsed,
+                         const Replacements<CHAR>& replacements,
+                         CharsetConverter* charset_converter,
+                         CanonOutput* output,
+                         Parsed* out_parsed) {
+  // If the scheme is overridden, just do a simple string substitution and
+  // re-parse the whole thing. There are lots of edge cases that we really don't
+  // want to deal with. Like what happens if I replace "http://e:8080/foo"
+  // with a file. Does it become "file:///E:/8080/foo" where the port number
+  // becomes part of the path? Parsing that string as a file URL says "yes"
+  // but almost no sane rule for dealing with the components individually would
+  // come up with that.
+  //
+  // Why allow these crazy cases at all? Programatically, there is almost no
+  // case for replacing the scheme. The most common case for hitting this is
+  // in JS when building up a URL using the location object. In this case, the
+  // JS code expects the string substitution behavior:
+  //   http://www.w3.org/TR/2008/WD-html5-20080610/structured.html#common3
+  if (replacements.IsSchemeOverridden()) {
+    // Canonicalize the new scheme so it is 8-bit and can be concatenated with
+    // the existing spec.
+    STACK_UNINITIALIZED RawCanonOutput<128> scheme_replaced;
+    Component scheme_replaced_parsed;
+    CanonicalizeScheme(replacements.sources().scheme,
+                       replacements.components().scheme,
+                       &scheme_replaced, &scheme_replaced_parsed);
+
+    // We can assume that the input is canonicalized, which means it always has
+    // a colon after the scheme (or where the scheme would be).
+    int spec_after_colon = parsed.scheme.is_valid() ? parsed.scheme.end() + 1
+                                                    : 1;
+    if (spec_len - spec_after_colon > 0) {
+      scheme_replaced.Append(&spec[spec_after_colon],
+                             spec_len - spec_after_colon);
+    }
+
+    // We now need to completely re-parse the resulting string since its meaning
+    // may have changed with the different scheme.
+    STACK_UNINITIALIZED RawCanonOutput<128> recanonicalized;
+    Parsed recanonicalized_parsed;
+    DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true,
+                   REMOVE_WHITESPACE, charset_converter, &recanonicalized,
+                   &recanonicalized_parsed);
+
+    // Recurse using the version with the scheme already replaced. This will now
+    // use the replacement rules for the new scheme.
+    //
+    // Warning: this code assumes that ReplaceComponents will re-check all
+    // components for validity. This is because we can't fail if DoCanonicalize
+    // failed above since theoretically the thing making it fail could be
+    // getting replaced here. If ReplaceComponents didn't re-check everything,
+    // we wouldn't know if something *not* getting replaced is a problem.
+    // If the scheme-specific replacers are made more intelligent so they don't
+    // re-check everything, we should instead re-canonicalize the whole thing
+    // after this call to check validity (this assumes replacing the scheme is
+    // much much less common than other types of replacements, like clearing the
+    // ref).
+    Replacements<CHAR> replacements_no_scheme = replacements;
+    replacements_no_scheme.SetScheme(NULL, Component());
+    // If the input URL has potentially dangling markup, set the flag on the
+    // output too. Note that in some cases the replacement gets rid of the
+    // potentially dangling markup, but this ok since the check will fail
+    // closed.
+    if (parsed.potentially_dangling_markup) {
+      out_parsed->potentially_dangling_markup = true;
+    }
+    return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(),
+                               recanonicalized_parsed, replacements_no_scheme,
+                               charset_converter, output, out_parsed);
+  }
+
+  // TODO(csharrison): We could be smarter about size to reserve if this is done
+  // in callers below, and the code checks to see which components are being
+  // replaced, and with what length. If this ends up being a hot spot it should
+  // be changed.
+  output->ReserveSizeIfNeeded(spec_len);
+
+  // If we get here, then we know the scheme doesn't need to be replaced, so can
+  // just key off the scheme in the spec to know how to do the replacements.
+  if (DoCompareSchemeComponent(spec, parsed.scheme, url::kFileScheme)) {
+    return ReplaceFileURL(spec, parsed, replacements, charset_converter, output,
+                          out_parsed);
+  }
+  if (DoCompareSchemeComponent(spec, parsed.scheme, url::kFileSystemScheme)) {
+    return ReplaceFileSystemURL(spec, parsed, replacements, charset_converter,
+                                output, out_parsed);
+  }
+  SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
+  if (DoIsStandard(spec, parsed.scheme, &scheme_type)) {
+    return ReplaceStandardURL(spec, parsed, replacements, scheme_type,
+                              charset_converter, output, out_parsed);
+  }
+  if (DoCompareSchemeComponent(spec, parsed.scheme, url::kMailToScheme)) {
+    return ReplaceMailtoURL(spec, parsed, replacements, output, out_parsed);
+  }
+
+  // Default is a path URL.
+  return ReplacePathURL(spec, parsed, replacements, output, out_parsed);
+}
+
+void DoSchemeModificationPreamble() {
+  // If this assert triggers, it means you've called Add*Scheme after
+  // the SchemeRegistry has been used.
+  //
+  // This normally means you're trying to set up a new scheme too late or using
+  // the SchemeRegistry too early in your application's init process.
+  DCHECK(!g_scheme_registries_used.load())
+      << "Trying to add a scheme after the lists have been used. "
+         "Make sure that you haven't added any static GURL initializers in tests.";
+
+  // If this assert triggers, it means you've called Add*Scheme after
+  // LockSchemeRegistries has been called (see the header file for
+  // LockSchemeRegistries for more).
+  //
+  // This normally means you're trying to set up a new scheme too late in your
+  // application's init process. Locate where your app does this initialization
+  // and calls LockSchemeRegistries, and add your new scheme there.
+  DCHECK(!scheme_registries_locked)
+      << "Trying to add a scheme after the lists have been locked.";
+}
+
+void DoAddSchemeWithHandler(const char* new_scheme,
+                            const char* handler,
+                            std::vector<SchemeWithHandler>* schemes) {
+  DoSchemeModificationPreamble();
+  DCHECK(schemes);
+  DCHECK(strlen(new_scheme) > 0);
+  DCHECK(strlen(handler) > 0);
+  DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme);
+  DCHECK(!base::Contains(*schemes, new_scheme, &SchemeWithHandler::scheme));
+  schemes->push_back({new_scheme, handler});
+}
+
+void DoAddScheme(const char* new_scheme, std::vector<std::string>* schemes) {
+  DoSchemeModificationPreamble();
+  DCHECK(schemes);
+  DCHECK(strlen(new_scheme) > 0);
+  DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme);
+  DCHECK(!base::Contains(*schemes, new_scheme));
+  schemes->push_back(new_scheme);
+}
+
+void DoAddSchemeWithType(const char* new_scheme,
+                         SchemeType type,
+                         std::vector<SchemeWithType>* schemes) {
+  DoSchemeModificationPreamble();
+  DCHECK(schemes);
+  DCHECK(strlen(new_scheme) > 0);
+  DCHECK_EQ(base::ToLowerASCII(new_scheme), new_scheme);
+  DCHECK(!base::Contains(*schemes, new_scheme, &SchemeWithType::scheme));
+  schemes->push_back({new_scheme, type});
+}
+
+}  // namespace
+
+void ClearSchemesForTests() {
+  DCHECK(!g_scheme_registries_used.load())
+      << "Schemes already used "
+      << "(use ScopedSchemeRegistryForTests to relax for tests).";
+  DCHECK(!scheme_registries_locked)
+      << "Schemes already locked "
+      << "(use ScopedSchemeRegistryForTests to relax for tests).";
+  *GetSchemeRegistryWithoutLocking() = SchemeRegistry();
+}
+
+class ScopedSchemeRegistryInternal {
+ public:
+  ScopedSchemeRegistryInternal()
+      : registry_(std::make_unique<SchemeRegistry>(
+            *GetSchemeRegistryWithoutLocking())) {
+    g_scheme_registries_used.store(false);
+    scheme_registries_locked = false;
+  }
+  ~ScopedSchemeRegistryInternal() {
+    *GetSchemeRegistryWithoutLocking() = *registry_;
+    g_scheme_registries_used.store(true);
+    scheme_registries_locked = true;
+  }
+
+ private:
+  std::unique_ptr<SchemeRegistry> registry_;
+};
+
+ScopedSchemeRegistryForTests::ScopedSchemeRegistryForTests()
+    : internal_(std::make_unique<ScopedSchemeRegistryInternal>()) {}
+
+ScopedSchemeRegistryForTests::~ScopedSchemeRegistryForTests() = default;
+
+void EnableNonStandardSchemesForAndroidWebView() {
+  DoSchemeModificationPreamble();
+  GetSchemeRegistryWithoutLocking()->allow_non_standard_schemes = true;
+}
+
+bool AllowNonStandardSchemesForAndroidWebView() {
+  return GetSchemeRegistry().allow_non_standard_schemes;
+}
+
+void AddStandardScheme(const char* new_scheme, SchemeType type) {
+  DoAddSchemeWithType(new_scheme, type,
+                      &GetSchemeRegistryWithoutLocking()->standard_schemes);
+}
+
+std::vector<std::string> GetStandardSchemes() {
+  std::vector<std::string> result;
+  result.reserve(GetSchemeRegistry().standard_schemes.size());
+  for (const auto& entry : GetSchemeRegistry().standard_schemes) {
+    result.push_back(entry.scheme);
+  }
+  return result;
+}
+
+void AddReferrerScheme(const char* new_scheme, SchemeType type) {
+  DoAddSchemeWithType(new_scheme, type,
+                      &GetSchemeRegistryWithoutLocking()->referrer_schemes);
+}
+
+void AddSecureScheme(const char* new_scheme) {
+  DoAddScheme(new_scheme, &GetSchemeRegistryWithoutLocking()->secure_schemes);
+}
+
+const std::vector<std::string>& GetSecureSchemes() {
+  return GetSchemeRegistry().secure_schemes;
+}
+
+void AddLocalScheme(const char* new_scheme) {
+  DoAddScheme(new_scheme, &GetSchemeRegistryWithoutLocking()->local_schemes);
+}
+
+const std::vector<std::string>& GetLocalSchemes() {
+  return GetSchemeRegistry().local_schemes;
+}
+
+void AddNoAccessScheme(const char* new_scheme) {
+  DoAddScheme(new_scheme,
+              &GetSchemeRegistryWithoutLocking()->no_access_schemes);
+}
+
+const std::vector<std::string>& GetNoAccessSchemes() {
+  return GetSchemeRegistry().no_access_schemes;
+}
+
+void AddCorsEnabledScheme(const char* new_scheme) {
+  DoAddScheme(new_scheme,
+              &GetSchemeRegistryWithoutLocking()->cors_enabled_schemes);
+}
+
+const std::vector<std::string>& GetCorsEnabledSchemes() {
+  return GetSchemeRegistry().cors_enabled_schemes;
+}
+
+void AddWebStorageScheme(const char* new_scheme) {
+  DoAddScheme(new_scheme,
+              &GetSchemeRegistryWithoutLocking()->web_storage_schemes);
+}
+
+const std::vector<std::string>& GetWebStorageSchemes() {
+  return GetSchemeRegistry().web_storage_schemes;
+}
+
+void AddCSPBypassingScheme(const char* new_scheme) {
+  DoAddScheme(new_scheme,
+              &GetSchemeRegistryWithoutLocking()->csp_bypassing_schemes);
+}
+
+const std::vector<std::string>& GetCSPBypassingSchemes() {
+  return GetSchemeRegistry().csp_bypassing_schemes;
+}
+
+void AddEmptyDocumentScheme(const char* new_scheme) {
+  DoAddScheme(new_scheme,
+              &GetSchemeRegistryWithoutLocking()->empty_document_schemes);
+}
+
+const std::vector<std::string>& GetEmptyDocumentSchemes() {
+  return GetSchemeRegistry().empty_document_schemes;
+}
+
+void AddPredefinedHandlerScheme(const char* new_scheme, const char* handler) {
+  DoAddSchemeWithHandler(
+      new_scheme, handler,
+      &GetSchemeRegistryWithoutLocking()->predefined_handler_schemes);
+}
+
+std::vector<std::pair<std::string, std::string>> GetPredefinedHandlerSchemes() {
+  std::vector<std::pair<std::string, std::string>> result;
+  result.reserve(GetSchemeRegistry().predefined_handler_schemes.size());
+  for (const SchemeWithHandler& entry :
+       GetSchemeRegistry().predefined_handler_schemes) {
+    result.emplace_back(entry.scheme, entry.handler);
+  }
+  return result;
+}
+
+void LockSchemeRegistries() {
+  scheme_registries_locked = true;
+}
+
+bool IsStandard(const char* spec, const Component& scheme) {
+  SchemeType unused_scheme_type;
+  return DoIsStandard(spec, scheme, &unused_scheme_type);
+}
+
+bool GetStandardSchemeType(const char* spec,
+                           const Component& scheme,
+                           SchemeType* type) {
+  return DoIsStandard(spec, scheme, type);
+}
+
+bool GetStandardSchemeType(const char16_t* spec,
+                           const Component& scheme,
+                           SchemeType* type) {
+  return DoIsStandard(spec, scheme, type);
+}
+
+bool IsStandard(const char16_t* spec, const Component& scheme) {
+  SchemeType unused_scheme_type;
+  return DoIsStandard(spec, scheme, &unused_scheme_type);
+}
+
+bool IsReferrerScheme(const char* spec, const Component& scheme) {
+  SchemeType unused_scheme_type;
+  return DoIsInSchemes(spec, scheme, &unused_scheme_type,
+                       GetSchemeRegistry().referrer_schemes);
+}
+
+bool FindAndCompareScheme(const char* str,
+                          int str_len,
+                          const char* compare,
+                          Component* found_scheme) {
+  return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
+}
+
+bool FindAndCompareScheme(const char16_t* str,
+                          int str_len,
+                          const char* compare,
+                          Component* found_scheme) {
+  return DoFindAndCompareScheme(str, str_len, compare, found_scheme);
+}
+
+bool DomainIs(base::StringPiece canonical_host,
+              base::StringPiece canonical_domain) {
+  if (canonical_host.empty() || canonical_domain.empty())
+    return false;
+
+  // If the host name ends with a dot but the input domain doesn't, then we
+  // ignore the dot in the host name.
+  size_t host_len = canonical_host.length();
+  if (canonical_host.back() == '.' && canonical_domain.back() != '.')
+    --host_len;
+
+  if (host_len < canonical_domain.length())
+    return false;
+
+  // |host_first_pos| is the start of the compared part of the host name, not
+  // start of the whole host name.
+  const char* host_first_pos =
+      canonical_host.data() + host_len - canonical_domain.length();
+
+  if (base::StringPiece(host_first_pos, canonical_domain.length()) !=
+      canonical_domain) {
+    return false;
+  }
+
+  // Make sure there aren't extra characters in host before the compared part;
+  // if the host name is longer than the input domain name, then the character
+  // immediately before the compared part should be a dot. For example,
+  // www.google.com has domain "google.com", but www.iamnotgoogle.com does not.
+  if (canonical_domain[0] != '.' && host_len > canonical_domain.length() &&
+      *(host_first_pos - 1) != '.') {
+    return false;
+  }
+
+  return true;
+}
+
+bool HostIsIPAddress(base::StringPiece host) {
+  STACK_UNINITIALIZED url::RawCanonOutputT<char, 128> ignored_output;
+  url::CanonHostInfo host_info;
+  url::CanonicalizeIPAddress(host.data(), Component(0, host.length()),
+                             &ignored_output, &host_info);
+  return host_info.IsIPAddress();
+}
+
+bool Canonicalize(const char* spec,
+                  int spec_len,
+                  bool trim_path_end,
+                  CharsetConverter* charset_converter,
+                  CanonOutput* output,
+                  Parsed* output_parsed) {
+  return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,
+                        charset_converter, output, output_parsed);
+}
+
+bool Canonicalize(const char16_t* spec,
+                  int spec_len,
+                  bool trim_path_end,
+                  CharsetConverter* charset_converter,
+                  CanonOutput* output,
+                  Parsed* output_parsed) {
+  return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,
+                        charset_converter, output, output_parsed);
+}
+
+bool ResolveRelative(const char* base_spec,
+                     int base_spec_len,
+                     const Parsed& base_parsed,
+                     const char* relative,
+                     int relative_length,
+                     CharsetConverter* charset_converter,
+                     CanonOutput* output,
+                     Parsed* output_parsed) {
+  return DoResolveRelative(base_spec, base_spec_len, base_parsed,
+                           relative, relative_length,
+                           charset_converter, output, output_parsed);
+}
+
+bool ResolveRelative(const char* base_spec,
+                     int base_spec_len,
+                     const Parsed& base_parsed,
+                     const char16_t* relative,
+                     int relative_length,
+                     CharsetConverter* charset_converter,
+                     CanonOutput* output,
+                     Parsed* output_parsed) {
+  return DoResolveRelative(base_spec, base_spec_len, base_parsed,
+                           relative, relative_length,
+                           charset_converter, output, output_parsed);
+}
+
+bool ReplaceComponents(const char* spec,
+                       int spec_len,
+                       const Parsed& parsed,
+                       const Replacements<char>& replacements,
+                       CharsetConverter* charset_converter,
+                       CanonOutput* output,
+                       Parsed* out_parsed) {
+  return DoReplaceComponents(spec, spec_len, parsed, replacements,
+                             charset_converter, output, out_parsed);
+}
+
+bool ReplaceComponents(const char* spec,
+                       int spec_len,
+                       const Parsed& parsed,
+                       const Replacements<char16_t>& replacements,
+                       CharsetConverter* charset_converter,
+                       CanonOutput* output,
+                       Parsed* out_parsed) {
+  return DoReplaceComponents(spec, spec_len, parsed, replacements,
+                             charset_converter, output, out_parsed);
+}
+
+void DecodeURLEscapeSequences(const char* input,
+                              int length,
+                              DecodeURLMode mode,
+                              CanonOutputW* output) {
+  if (length <= 0)
+    return;
+
+  STACK_UNINITIALIZED RawCanonOutputT<char> unescaped_chars;
+  size_t length_size_t = static_cast<size_t>(length);
+  for (size_t i = 0; i < length_size_t; i++) {
+    if (input[i] == '%') {
+      unsigned char ch;
+      if (DecodeEscaped(input, &i, length_size_t, &ch)) {
+        unescaped_chars.push_back(ch);
+      } else {
+        // Invalid escape sequence, copy the percent literal.
+        unescaped_chars.push_back('%');
+      }
+    } else {
+      // Regular non-escaped 8-bit character.
+      unescaped_chars.push_back(input[i]);
+    }
+  }
+
+  int output_initial_length = output->length();
+  // Convert that 8-bit to UTF-16. It's not clear IE does this at all to
+  // JavaScript URLs, but Firefox and Safari do.
+  size_t unescaped_length = unescaped_chars.length();
+  for (size_t i = 0; i < unescaped_length; i++) {
+    unsigned char uch = static_cast<unsigned char>(unescaped_chars.at(i));
+    if (uch < 0x80) {
+      // Non-UTF-8, just append directly
+      output->push_back(uch);
+    } else {
+      // next_ch will point to the last character of the decoded
+      // character.
+      size_t next_character = i;
+      base_icu::UChar32 code_point;
+      if (ReadUTFChar(unescaped_chars.data(), &next_character, unescaped_length,
+                      &code_point)) {
+        // Valid UTF-8 character, convert to UTF-16.
+        AppendUTF16Value(code_point, output);
+        i = next_character;
+      } else if (mode == DecodeURLMode::kUTF8) {
+        DCHECK_EQ(code_point, 0xFFFD);
+        AppendUTF16Value(code_point, output);
+        i = next_character;
+      } else {
+        // If there are any sequences that are not valid UTF-8, we
+        // revert |output| changes, and promote any bytes to UTF-16. We
+        // copy all characters from the beginning to the end of the
+        // identified sequence.
+        output->set_length(output_initial_length);
+        for (size_t j = 0; j < unescaped_chars.length(); ++j)
+          output->push_back(static_cast<unsigned char>(unescaped_chars.at(j)));
+        break;
+      }
+    }
+  }
+}
+
+void EncodeURIComponent(const char* input, int length, CanonOutput* output) {
+  for (int i = 0; i < length; ++i) {
+    unsigned char c = static_cast<unsigned char>(input[i]);
+    if (IsComponentChar(c))
+      output->push_back(c);
+    else
+      AppendEscapedChar(c, output);
+  }
+}
+
+bool CompareSchemeComponent(const char* spec,
+                            const Component& component,
+                            const char* compare_to) {
+  return DoCompareSchemeComponent(spec, component, compare_to);
+}
+
+bool CompareSchemeComponent(const char16_t* spec,
+                            const Component& component,
+                            const char* compare_to) {
+  return DoCompareSchemeComponent(spec, component, compare_to);
+}
+
+}  // namespace url
diff --git a/url_util.h b/url_util.h
new file mode 100644
index 00000000000..670552a8ce1
--- /dev/null
+++ b/url_util.h
@@ -0,0 +1,314 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_UTIL_H_
+#define URL_URL_UTIL_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "base/component_export.h"
+#include "base/strings/string_piece.h"
+#include "url/third_party/mozilla/url_parse.h"
+#include "url/url_canon.h"
+#include "url/url_constants.h"
+
+namespace url {
+
+// Init ------------------------------------------------------------------------
+
+// Used for tests that need to reset schemes. Note that this can only be used
+// in conjunction with ScopedSchemeRegistryForTests.
+COMPONENT_EXPORT(URL) void ClearSchemesForTests();
+
+class ScopedSchemeRegistryInternal;
+
+// Stores the SchemeRegistry upon creation, allowing tests to modify a copy of
+// it, and restores the original SchemeRegistry when deleted.
+class COMPONENT_EXPORT(URL) ScopedSchemeRegistryForTests {
+ public:
+  ScopedSchemeRegistryForTests();
+  ~ScopedSchemeRegistryForTests();
+
+ private:
+  std::unique_ptr<ScopedSchemeRegistryInternal> internal_;
+};
+
+// Schemes ---------------------------------------------------------------------
+
+// Changes the behavior of SchemeHostPort / Origin to allow non-standard schemes
+// to be specified, instead of canonicalizing them to an invalid SchemeHostPort
+// or opaque Origin, respectively. This is used for Android WebView backwards
+// compatibility, which allows the use of custom schemes: content hosted in
+// Android WebView assumes that one URL with a non-standard scheme will be
+// same-origin to another URL with the same non-standard scheme.
+//
+// Not thread-safe.
+COMPONENT_EXPORT(URL) void EnableNonStandardSchemesForAndroidWebView();
+
+// Whether or not SchemeHostPort and Origin allow non-standard schemes.
+COMPONENT_EXPORT(URL) bool AllowNonStandardSchemesForAndroidWebView();
+
+// The following Add*Scheme method are not threadsafe and can not be called
+// concurrently with any other url_util function. They will assert if the lists
+// of schemes have been locked (see LockSchemeRegistries), or used.
+
+// Adds an application-defined scheme to the internal list of "standard-format"
+// URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic
+// URI syntax" (https://tools.ietf.org/html/rfc3986#section-3).
+
+COMPONENT_EXPORT(URL)
+void AddStandardScheme(const char* new_scheme, SchemeType scheme_type);
+
+// Returns the list of schemes registered for "standard" URLs.  Note, this
+// should not be used if you just need to check if your protocol is standard
+// or not.  Instead use the IsStandard() function above as its much more
+// efficient.  This function should only be used where you need to perform
+// other operations against the standard scheme list.
+COMPONENT_EXPORT(URL)
+std::vector<std::string> GetStandardSchemes();
+
+// Adds an application-defined scheme to the internal list of schemes allowed
+// for referrers.
+COMPONENT_EXPORT(URL)
+void AddReferrerScheme(const char* new_scheme, SchemeType scheme_type);
+
+// Adds an application-defined scheme to the list of schemes that do not trigger
+// mixed content warnings.
+COMPONENT_EXPORT(URL) void AddSecureScheme(const char* new_scheme);
+COMPONENT_EXPORT(URL) const std::vector<std::string>& GetSecureSchemes();
+
+// Adds an application-defined scheme to the list of schemes that normal pages
+// cannot link to or access (i.e., with the same security rules as those applied
+// to "file" URLs).
+COMPONENT_EXPORT(URL) void AddLocalScheme(const char* new_scheme);
+COMPONENT_EXPORT(URL) const std::vector<std::string>& GetLocalSchemes();
+
+// Adds an application-defined scheme to the list of schemes that cause pages
+// loaded with them to not have access to pages loaded with any other URL
+// scheme.
+COMPONENT_EXPORT(URL) void AddNoAccessScheme(const char* new_scheme);
+COMPONENT_EXPORT(URL) const std::vector<std::string>& GetNoAccessSchemes();
+
+// Adds an application-defined scheme to the list of schemes that can be sent
+// CORS requests.
+COMPONENT_EXPORT(URL) void AddCorsEnabledScheme(const char* new_scheme);
+COMPONENT_EXPORT(URL) const std::vector<std::string>& GetCorsEnabledSchemes();
+
+// Adds an application-defined scheme to the list of web schemes that can be
+// used by web to store data (e.g. cookies, local storage, ...). This is
+// to differentiate them from schemes that can store data but are not used on
+// web (e.g. application's internal schemes) or schemes that are used on web but
+// cannot store data.
+COMPONENT_EXPORT(URL) void AddWebStorageScheme(const char* new_scheme);
+COMPONENT_EXPORT(URL) const std::vector<std::string>& GetWebStorageSchemes();
+
+// Adds an application-defined scheme to the list of schemes that can bypass the
+// Content-Security-Policy (CSP) checks.
+COMPONENT_EXPORT(URL) void AddCSPBypassingScheme(const char* new_scheme);
+COMPONENT_EXPORT(URL) const std::vector<std::string>& GetCSPBypassingSchemes();
+
+// Adds an application-defined scheme to the list of schemes that are strictly
+// empty documents, allowing them to commit synchronously.
+COMPONENT_EXPORT(URL) void AddEmptyDocumentScheme(const char* new_scheme);
+COMPONENT_EXPORT(URL) const std::vector<std::string>& GetEmptyDocumentSchemes();
+
+// Adds a scheme with a predefined default handler.
+//
+// This pair of strings must be normalized protocol handler parameters as
+// described in the Custom Handler specification.
+// https://html.spec.whatwg.org/multipage/system-state.html#normalize-protocol-handler-parameters
+COMPONENT_EXPORT(URL)
+void AddPredefinedHandlerScheme(const char* new_scheme, const char* handler);
+COMPONENT_EXPORT(URL)
+std::vector<std::pair<std::string, std::string>> GetPredefinedHandlerSchemes();
+
+// Sets a flag to prevent future calls to Add*Scheme from succeeding.
+//
+// This is designed to help prevent errors for multithreaded applications.
+// Normal usage would be to call Add*Scheme for your custom schemes at
+// the beginning of program initialization, and then LockSchemeRegistries. This
+// prevents future callers from mistakenly calling Add*Scheme when the
+// program is running with multiple threads, where such usage would be
+// dangerous.
+//
+// We could have had Add*Scheme use a lock instead, but that would add
+// some platform-specific dependencies we don't otherwise have now, and is
+// overkill considering the normal usage is so simple.
+COMPONENT_EXPORT(URL) void LockSchemeRegistries();
+
+// Locates the scheme in the given string and places it into |found_scheme|,
+// which may be NULL to indicate the caller does not care about the range.
+//
+// Returns whether the given |compare| scheme matches the scheme found in the
+// input (if any). The |compare| scheme must be a valid canonical scheme or
+// the result of the comparison is undefined.
+COMPONENT_EXPORT(URL)
+bool FindAndCompareScheme(const char* str,
+                          int str_len,
+                          const char* compare,
+                          Component* found_scheme);
+COMPONENT_EXPORT(URL)
+bool FindAndCompareScheme(const char16_t* str,
+                          int str_len,
+                          const char* compare,
+                          Component* found_scheme);
+inline bool FindAndCompareScheme(const std::string& str,
+                                 const char* compare,
+                                 Component* found_scheme) {
+  return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
+                              compare, found_scheme);
+}
+inline bool FindAndCompareScheme(const std::u16string& str,
+                                 const char* compare,
+                                 Component* found_scheme) {
+  return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
+                              compare, found_scheme);
+}
+
+// Returns true if the given scheme identified by |scheme| within |spec| is in
+// the list of known standard-format schemes (see AddStandardScheme).
+COMPONENT_EXPORT(URL)
+bool IsStandard(const char* spec, const Component& scheme);
+COMPONENT_EXPORT(URL)
+bool IsStandard(const char16_t* spec, const Component& scheme);
+
+// Returns true if the given scheme identified by |scheme| within |spec| is in
+// the list of allowed schemes for referrers (see AddReferrerScheme).
+COMPONENT_EXPORT(URL)
+bool IsReferrerScheme(const char* spec, const Component& scheme);
+
+// Returns true and sets |type| to the SchemeType of the given scheme
+// identified by |scheme| within |spec| if the scheme is in the list of known
+// standard-format schemes (see AddStandardScheme).
+COMPONENT_EXPORT(URL)
+bool GetStandardSchemeType(const char* spec,
+                           const Component& scheme,
+                           SchemeType* type);
+COMPONENT_EXPORT(URL)
+bool GetStandardSchemeType(const char16_t* spec,
+                           const Component& scheme,
+                           SchemeType* type);
+
+// Hosts  ----------------------------------------------------------------------
+
+// Returns true if the |canonical_host| matches or is in the same domain as the
+// given |canonical_domain| string. For example, if the canonicalized hostname
+// is "www.google.com", this will return true for "com", "google.com", and
+// "www.google.com" domains.
+//
+// If either of the input StringPieces is empty, the return value is false. The
+// input domain should match host canonicalization rules. i.e. it should be
+// lowercase except for escape chars.
+COMPONENT_EXPORT(URL)
+bool DomainIs(base::StringPiece canonical_host,
+              base::StringPiece canonical_domain);
+
+// Returns true if the hostname is an IP address. Note: this function isn't very
+// cheap, as it must re-parse the host to verify.
+COMPONENT_EXPORT(URL) bool HostIsIPAddress(base::StringPiece host);
+
+// URL library wrappers --------------------------------------------------------
+
+// Parses the given spec according to the extracted scheme type. Normal users
+// should use the URL object, although this may be useful if performance is
+// critical and you don't want to do the heap allocation for the std::string.
+//
+// As with the Canonicalize* functions, the charset converter can
+// be NULL to use UTF-8 (it will be faster in this case).
+//
+// Returns true if a valid URL was produced, false if not. On failure, the
+// output and parsed structures will still be filled and will be consistent,
+// but they will not represent a loadable URL.
+COMPONENT_EXPORT(URL)
+bool Canonicalize(const char* spec,
+                  int spec_len,
+                  bool trim_path_end,
+                  CharsetConverter* charset_converter,
+                  CanonOutput* output,
+                  Parsed* output_parsed);
+COMPONENT_EXPORT(URL)
+bool Canonicalize(const char16_t* spec,
+                  int spec_len,
+                  bool trim_path_end,
+                  CharsetConverter* charset_converter,
+                  CanonOutput* output,
+                  Parsed* output_parsed);
+
+// Resolves a potentially relative URL relative to the given parsed base URL.
+// The base MUST be valid. The resulting canonical URL and parsed information
+// will be placed in to the given out variables.
+//
+// The relative need not be relative. If we discover that it's absolute, this
+// will produce a canonical version of that URL. See Canonicalize() for more
+// about the charset_converter.
+//
+// Returns true if the output is valid, false if the input could not produce
+// a valid URL.
+COMPONENT_EXPORT(URL)
+bool ResolveRelative(const char* base_spec,
+                     int base_spec_len,
+                     const Parsed& base_parsed,
+                     const char* relative,
+                     int relative_length,
+                     CharsetConverter* charset_converter,
+                     CanonOutput* output,
+                     Parsed* output_parsed);
+COMPONENT_EXPORT(URL)
+bool ResolveRelative(const char* base_spec,
+                     int base_spec_len,
+                     const Parsed& base_parsed,
+                     const char16_t* relative,
+                     int relative_length,
+                     CharsetConverter* charset_converter,
+                     CanonOutput* output,
+                     Parsed* output_parsed);
+
+// Replaces components in the given VALID input URL. The new canonical URL info
+// is written to output and out_parsed.
+//
+// Returns true if the resulting URL is valid.
+COMPONENT_EXPORT(URL)
+bool ReplaceComponents(const char* spec,
+                       int spec_len,
+                       const Parsed& parsed,
+                       const Replacements<char>& replacements,
+                       CharsetConverter* charset_converter,
+                       CanonOutput* output,
+                       Parsed* out_parsed);
+COMPONENT_EXPORT(URL)
+bool ReplaceComponents(const char* spec,
+                       int spec_len,
+                       const Parsed& parsed,
+                       const Replacements<char16_t>& replacements,
+                       CharsetConverter* charset_converter,
+                       CanonOutput* output,
+                       Parsed* out_parsed);
+
+// String helper functions -----------------------------------------------------
+
+enum class DecodeURLMode {
+  // UTF-8 decode only. Invalid byte sequences are replaced with U+FFFD.
+  kUTF8,
+  // Try UTF-8 decoding. If the input contains byte sequences invalid
+  // for UTF-8, apply byte to Unicode mapping.
+  kUTF8OrIsomorphic,
+};
+
+// Unescapes the given string using URL escaping rules.
+COMPONENT_EXPORT(URL)
+void DecodeURLEscapeSequences(const char* input,
+                              int length,
+                              DecodeURLMode mode,
+                              CanonOutputW* output);
+
+// Escapes the given string as defined by the JS method encodeURIComponent. See
+// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent
+COMPONENT_EXPORT(URL)
+void EncodeURIComponent(const char* input, int length, CanonOutput* output);
+
+}  // namespace url
+
+#endif  // URL_URL_UTIL_H_
diff --git a/url_util_internal.h b/url_util_internal.h
new file mode 100644
index 00000000000..fe2a4d93bb3
--- /dev/null
+++ b/url_util_internal.h
@@ -0,0 +1,23 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_URL_UTIL_INTERNAL_H_
+#define URL_URL_UTIL_INTERNAL_H_
+
+#include "url/third_party/mozilla/url_parse.h"
+
+namespace url {
+
+// Given a string and a range inside the string, compares it to the given
+// lower-case |compare_to| buffer.
+bool CompareSchemeComponent(const char* spec,
+                            const Component& component,
+                            const char* compare_to);
+bool CompareSchemeComponent(const char16_t* spec,
+                            const Component& component,
+                            const char* compare_to);
+
+}  // namespace url
+
+#endif  // URL_URL_UTIL_INTERNAL_H_
diff --git a/url_util_unittest.cc b/url_util_unittest.cc
new file mode 100644
index 00000000000..e1d7801b011
--- /dev/null
+++ b/url_util_unittest.cc
@@ -0,0 +1,631 @@
+// Copyright 2013 The Chromium Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/url_util.h"
+
+#include <stddef.h>
+
+#include "base/strings/string_piece.h"
+#include "build/build_config.h"
+#include "testing/gtest/include/gtest/gtest-message.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/abseil-cpp/absl/types/optional.h"
+#include "url/third_party/mozilla/url_parse.h"
+#include "url/url_canon.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_test_utils.h"
+
+namespace url {
+
+class URLUtilTest : public testing::Test {
+ public:
+  URLUtilTest() = default;
+
+  URLUtilTest(const URLUtilTest&) = delete;
+  URLUtilTest& operator=(const URLUtilTest&) = delete;
+
+  ~URLUtilTest() override = default;
+
+ private:
+  ScopedSchemeRegistryForTests scoped_registry_;
+};
+
+TEST_F(URLUtilTest, FindAndCompareScheme) {
+  Component found_scheme;
+
+  // Simple case where the scheme is found and matches.
+  const char kStr1[] = "http://www.com/";
+  EXPECT_TRUE(FindAndCompareScheme(
+      kStr1, static_cast<int>(strlen(kStr1)), "http", NULL));
+  EXPECT_TRUE(FindAndCompareScheme(
+      kStr1, static_cast<int>(strlen(kStr1)), "http", &found_scheme));
+  EXPECT_TRUE(found_scheme == Component(0, 4));
+
+  // A case where the scheme is found and doesn't match.
+  EXPECT_FALSE(FindAndCompareScheme(
+      kStr1, static_cast<int>(strlen(kStr1)), "https", &found_scheme));
+  EXPECT_TRUE(found_scheme == Component(0, 4));
+
+  // A case where there is no scheme.
+  const char kStr2[] = "httpfoobar";
+  EXPECT_FALSE(FindAndCompareScheme(
+      kStr2, static_cast<int>(strlen(kStr2)), "http", &found_scheme));
+  EXPECT_TRUE(found_scheme == Component());
+
+  // When there is an empty scheme, it should match the empty scheme.
+  const char kStr3[] = ":foo.com/";
+  EXPECT_TRUE(FindAndCompareScheme(
+      kStr3, static_cast<int>(strlen(kStr3)), "", &found_scheme));
+  EXPECT_TRUE(found_scheme == Component(0, 0));
+
+  // But when there is no scheme, it should fail.
+  EXPECT_FALSE(FindAndCompareScheme("", 0, "", &found_scheme));
+  EXPECT_TRUE(found_scheme == Component());
+
+  // When there is a whitespace char in scheme, it should canonicalize the URL
+  // before comparison.
+  const char whtspc_str[] = " \r\n\tjav\ra\nscri\tpt:alert(1)";
+  EXPECT_TRUE(FindAndCompareScheme(whtspc_str,
+                                   static_cast<int>(strlen(whtspc_str)),
+                                   "javascript", &found_scheme));
+  EXPECT_TRUE(found_scheme == Component(1, 10));
+
+  // Control characters should be stripped out on the ends, and kept in the
+  // middle.
+  const char ctrl_str[] = "\02jav\02scr\03ipt:alert(1)";
+  EXPECT_FALSE(FindAndCompareScheme(ctrl_str,
+                                    static_cast<int>(strlen(ctrl_str)),
+                                    "javascript", &found_scheme));
+  EXPECT_TRUE(found_scheme == Component(1, 11));
+}
+
+TEST_F(URLUtilTest, IsStandard) {
+  const char kHTTPScheme[] = "http";
+  EXPECT_TRUE(IsStandard(kHTTPScheme, Component(0, strlen(kHTTPScheme))));
+
+  const char kFooScheme[] = "foo";
+  EXPECT_FALSE(IsStandard(kFooScheme, Component(0, strlen(kFooScheme))));
+}
+
+TEST_F(URLUtilTest, IsReferrerScheme) {
+  const char kHTTPScheme[] = "http";
+  EXPECT_TRUE(IsReferrerScheme(kHTTPScheme, Component(0, strlen(kHTTPScheme))));
+
+  const char kFooScheme[] = "foo";
+  EXPECT_FALSE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
+}
+
+TEST_F(URLUtilTest, AddReferrerScheme) {
+  static const char kFooScheme[] = "foo";
+  EXPECT_FALSE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
+
+  url::ScopedSchemeRegistryForTests scoped_registry;
+  AddReferrerScheme(kFooScheme, url::SCHEME_WITH_HOST);
+  EXPECT_TRUE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
+}
+
+TEST_F(URLUtilTest, ShutdownCleansUpSchemes) {
+  static const char kFooScheme[] = "foo";
+  EXPECT_FALSE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
+
+  {
+    url::ScopedSchemeRegistryForTests scoped_registry;
+    AddReferrerScheme(kFooScheme, url::SCHEME_WITH_HOST);
+    EXPECT_TRUE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
+  }
+
+  EXPECT_FALSE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme))));
+}
+
+TEST_F(URLUtilTest, GetStandardSchemeType) {
+  url::SchemeType scheme_type;
+
+  const char kHTTPScheme[] = "http";
+  scheme_type = url::SCHEME_WITHOUT_AUTHORITY;
+  EXPECT_TRUE(GetStandardSchemeType(kHTTPScheme,
+                                    Component(0, strlen(kHTTPScheme)),
+                                    &scheme_type));
+  EXPECT_EQ(url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, scheme_type);
+
+  const char kFilesystemScheme[] = "filesystem";
+  scheme_type = url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
+  EXPECT_TRUE(GetStandardSchemeType(kFilesystemScheme,
+                                    Component(0, strlen(kFilesystemScheme)),
+                                    &scheme_type));
+  EXPECT_EQ(url::SCHEME_WITHOUT_AUTHORITY, scheme_type);
+
+  const char kFooScheme[] = "foo";
+  scheme_type = url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
+  EXPECT_FALSE(GetStandardSchemeType(kFooScheme,
+                                     Component(0, strlen(kFooScheme)),
+                                     &scheme_type));
+}
+
+TEST_F(URLUtilTest, GetStandardSchemes) {
+  std::vector<std::string> expected = {
+      kHttpsScheme, kHttpScheme, kFileScheme,       kFtpScheme,
+      kWssScheme,   kWsScheme,   kFileSystemScheme, "foo",
+  };
+  AddStandardScheme("foo", url::SCHEME_WITHOUT_AUTHORITY);
+  EXPECT_EQ(expected, GetStandardSchemes());
+}
+
+TEST_F(URLUtilTest, ReplaceComponents) {
+  Parsed parsed;
+  RawCanonOutputT<char> output;
+  Parsed new_parsed;
+
+  // Check that the following calls do not cause crash
+  Replacements<char> replacements;
+  replacements.SetRef("test", Component(0, 4));
+  ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, &new_parsed);
+  ReplaceComponents("", 0, parsed, replacements, NULL, &output, &new_parsed);
+  replacements.ClearRef();
+  replacements.SetHost("test", Component(0, 4));
+  ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, &new_parsed);
+  ReplaceComponents("", 0, parsed, replacements, NULL, &output, &new_parsed);
+
+  replacements.ClearHost();
+  ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, &new_parsed);
+  ReplaceComponents("", 0, parsed, replacements, NULL, &output, &new_parsed);
+  ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, &new_parsed);
+  ReplaceComponents("", 0, parsed, replacements, NULL, &output, &new_parsed);
+}
+
+static std::string CheckReplaceScheme(const char* base_url,
+                                      const char* scheme) {
+  // Make sure the input is canonicalized.
+  RawCanonOutput<32> original;
+  Parsed original_parsed;
+  Canonicalize(base_url, strlen(base_url), true, NULL, &original,
+               &original_parsed);
+
+  Replacements<char> replacements;
+  replacements.SetScheme(scheme, Component(0, strlen(scheme)));
+
+  std::string output_string;
+  StdStringCanonOutput output(&output_string);
+  Parsed output_parsed;
+  ReplaceComponents(original.data(), original.length(), original_parsed,
+                    replacements, NULL, &output, &output_parsed);
+
+  output.Complete();
+  return output_string;
+}
+
+TEST_F(URLUtilTest, ReplaceScheme) {
+  EXPECT_EQ("https://google.com/",
+            CheckReplaceScheme("http://google.com/", "https"));
+  EXPECT_EQ("file://google.com/",
+            CheckReplaceScheme("http://google.com/", "file"));
+  EXPECT_EQ("http://home/Build",
+            CheckReplaceScheme("file:///Home/Build", "http"));
+  EXPECT_EQ("javascript:foo",
+            CheckReplaceScheme("about:foo", "javascript"));
+  EXPECT_EQ("://google.com/",
+            CheckReplaceScheme("http://google.com/", ""));
+  EXPECT_EQ("http://google.com/",
+            CheckReplaceScheme("about:google.com", "http"));
+  EXPECT_EQ("http:", CheckReplaceScheme("", "http"));
+
+#ifdef WIN32
+  // Magic Windows drive letter behavior when converting to a file URL.
+  EXPECT_EQ("file:///E:/foo/",
+            CheckReplaceScheme("http://localhost/e:foo/", "file"));
+#endif
+
+  // This will probably change to "about://google.com/" when we fix
+  // http://crbug.com/160 which should also be an acceptable result.
+  EXPECT_EQ("about://google.com/",
+            CheckReplaceScheme("http://google.com/", "about"));
+
+  EXPECT_EQ("http://example.com/%20hello%20#%20world",
+            CheckReplaceScheme("myscheme:example.com/ hello # world ", "http"));
+}
+
+TEST_F(URLUtilTest, DecodeURLEscapeSequences) {
+  struct DecodeCase {
+    const char* input;
+    const char* output;
+  } decode_cases[] = {
+      {"hello, world", "hello, world"},
+      {"%01%02%03%04%05%06%07%08%09%0a%0B%0C%0D%0e%0f/",
+       "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0B\x0C\x0D\x0e\x0f/"},
+      {"%10%11%12%13%14%15%16%17%18%19%1a%1B%1C%1D%1e%1f/",
+       "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1B\x1C\x1D\x1e\x1f/"},
+      {"%20%21%22%23%24%25%26%27%28%29%2a%2B%2C%2D%2e%2f/",
+       " !\"#$%&'()*+,-.//"},
+      {"%30%31%32%33%34%35%36%37%38%39%3a%3B%3C%3D%3e%3f/",
+       "0123456789:;<=>?/"},
+      {"%40%41%42%43%44%45%46%47%48%49%4a%4B%4C%4D%4e%4f/",
+       "@ABCDEFGHIJKLMNO/"},
+      {"%50%51%52%53%54%55%56%57%58%59%5a%5B%5C%5D%5e%5f/",
+       "PQRSTUVWXYZ[\\]^_/"},
+      {"%60%61%62%63%64%65%66%67%68%69%6a%6B%6C%6D%6e%6f/",
+       "`abcdefghijklmno/"},
+      {"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/",
+       "pqrstuvwxyz{|}~\x7f/"},
+      {"%e4%bd%a0%e5%a5%bd", "\xe4\xbd\xa0\xe5\xa5\xbd"},
+  };
+
+  for (size_t i = 0; i < std::size(decode_cases); i++) {
+    const char* input = decode_cases[i].input;
+    RawCanonOutputT<char16_t> output;
+    DecodeURLEscapeSequences(input, strlen(input),
+                             DecodeURLMode::kUTF8OrIsomorphic, &output);
+    EXPECT_EQ(decode_cases[i].output, base::UTF16ToUTF8(std::u16string(
+                                          output.data(), output.length())));
+
+    RawCanonOutputT<char16_t> output_utf8;
+    DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8,
+                             &output_utf8);
+    EXPECT_EQ(decode_cases[i].output,
+              base::UTF16ToUTF8(
+                  std::u16string(output_utf8.data(), output_utf8.length())));
+  }
+
+  // Our decode should decode %00
+  const char zero_input[] = "%00";
+  RawCanonOutputT<char16_t> zero_output;
+  DecodeURLEscapeSequences(zero_input, strlen(zero_input), DecodeURLMode::kUTF8,
+                           &zero_output);
+  EXPECT_NE("%00", base::UTF16ToUTF8(std::u16string(zero_output.data(),
+                                                    zero_output.length())));
+
+  // Test the error behavior for invalid UTF-8.
+  struct Utf8DecodeCase {
+    const char* input;
+    std::vector<char16_t> expected_iso;
+    std::vector<char16_t> expected_utf8;
+  } utf8_decode_cases[] = {
+      // %e5%a5%bd is a valid UTF-8 sequence. U+597D
+      {"%e4%a0%e5%a5%bd",
+       {0x00e4, 0x00a0, 0x00e5, 0x00a5, 0x00bd, 0},
+       {0xfffd, 0x597d, 0}},
+      {"%e5%a5%bd%e4%a0",
+       {0x00e5, 0x00a5, 0x00bd, 0x00e4, 0x00a0, 0},
+       {0x597d, 0xfffd, 0}},
+      {"%e4%a0%e5%bd",
+       {0x00e4, 0x00a0, 0x00e5, 0x00bd, 0},
+       {0xfffd, 0xfffd, 0}},
+  };
+
+  for (const auto& test : utf8_decode_cases) {
+    const char* input = test.input;
+    RawCanonOutputT<char16_t> output_iso;
+    DecodeURLEscapeSequences(input, strlen(input),
+                             DecodeURLMode::kUTF8OrIsomorphic, &output_iso);
+    EXPECT_EQ(std::u16string(test.expected_iso.data()),
+              std::u16string(output_iso.data(), output_iso.length()));
+
+    RawCanonOutputT<char16_t> output_utf8;
+    DecodeURLEscapeSequences(input, strlen(input), DecodeURLMode::kUTF8,
+                             &output_utf8);
+    EXPECT_EQ(std::u16string(test.expected_utf8.data()),
+              std::u16string(output_utf8.data(), output_utf8.length()));
+  }
+}
+
+TEST_F(URLUtilTest, TestEncodeURIComponent) {
+  struct EncodeCase {
+    const char* input;
+    const char* output;
+  } encode_cases[] = {
+    {"hello, world", "hello%2C%20world"},
+    {"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
+     "%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F"},
+    {"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F",
+     "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F"},
+    {" !\"#$%&'()*+,-./",
+     "%20!%22%23%24%25%26%27()*%2B%2C-.%2F"},
+    {"0123456789:;<=>?",
+     "0123456789%3A%3B%3C%3D%3E%3F"},
+    {"@ABCDEFGHIJKLMNO",
+     "%40ABCDEFGHIJKLMNO"},
+    {"PQRSTUVWXYZ[\\]^_",
+     "PQRSTUVWXYZ%5B%5C%5D%5E_"},
+    {"`abcdefghijklmno",
+     "%60abcdefghijklmno"},
+    {"pqrstuvwxyz{|}~\x7f",
+     "pqrstuvwxyz%7B%7C%7D~%7F"},
+  };
+
+  for (size_t i = 0; i < std::size(encode_cases); i++) {
+    const char* input = encode_cases[i].input;
+    RawCanonOutputT<char> buffer;
+    EncodeURIComponent(input, strlen(input), &buffer);
+    std::string output(buffer.data(), buffer.length());
+    EXPECT_EQ(encode_cases[i].output, output);
+  }
+}
+
+TEST_F(URLUtilTest, TestResolveRelativeWithNonStandardBase) {
+  // This tests non-standard (in the sense that IsStandard() == false)
+  // hierarchical schemes.
+  struct ResolveRelativeCase {
+    const char* base;
+    const char* rel;
+    bool is_valid;
+    const char* out;
+  } resolve_non_standard_cases[] = {
+      // Resolving a relative path against a non-hierarchical URL should fail.
+      {"scheme:opaque_data", "/path", false, ""},
+      // Resolving a relative path against a non-standard authority-based base
+      // URL doesn't alter the authority section.
+      {"scheme://Authority/", "../path", true, "scheme://Authority/path"},
+      // A non-standard hierarchical base is resolved with path URL
+      // canonicalization rules.
+      {"data:/Blah:Blah/", "file.html", true, "data:/Blah:Blah/file.html"},
+      {"data:/Path/../part/part2", "file.html", true,
+       "data:/Path/../part/file.html"},
+      {"data://text/html,payload", "//user:pass@host:33////payload22", true,
+       "data://user:pass@host:33////payload22"},
+      // Path URL canonicalization rules also apply to non-standard authority-
+      // based URLs.
+      {"custom://Authority/", "file.html", true,
+       "custom://Authority/file.html"},
+      {"custom://Authority/", "other://Auth/", true, "other://Auth/"},
+      {"custom://Authority/", "../../file.html", true,
+       "custom://Authority/file.html"},
+      {"custom://Authority/path/", "file.html", true,
+       "custom://Authority/path/file.html"},
+      {"custom://Authority:NoCanon/path/", "file.html", true,
+       "custom://Authority:NoCanon/path/file.html"},
+      // It's still possible to get an invalid path URL.
+      {"custom://Invalid:!#Auth/", "file.html", false, ""},
+      // A path with an authority section gets canonicalized under standard URL
+      // rules, even though the base was non-standard.
+      {"content://content.Provider/", "//other.Provider", true,
+       "content://other.provider/"},
+
+      // Resolving an absolute URL doesn't cause canonicalization of the
+      // result.
+      {"about:blank", "custom://Authority", true, "custom://Authority"},
+      // Fragment URLs can be resolved against a non-standard base.
+      {"scheme://Authority/path", "#fragment", true,
+       "scheme://Authority/path#fragment"},
+      {"scheme://Authority/", "#fragment", true,
+       "scheme://Authority/#fragment"},
+      // Resolving should fail if the base URL is authority-based but is
+      // missing a path component (the '/' at the end).
+      {"scheme://Authority", "path", false, ""},
+      // Test resolving a fragment (only) against any kind of base-URL.
+      {"about:blank", "#id42", true, "about:blank#id42"},
+      {"about:blank", " #id42", true, "about:blank#id42"},
+      {"about:blank#oldfrag", "#newfrag", true, "about:blank#newfrag"},
+      {"about:blank", " #id:42", true, "about:blank#id:42"},
+      // A surprising side effect of allowing fragments to resolve against
+      // any URL scheme is we might break javascript: URLs by doing so...
+      {"javascript:alert('foo#bar')", "#badfrag", true,
+       "javascript:alert('foo#badfrag"},
+      // In this case, the backslashes will not be canonicalized because it's a
+      // non-standard URL, but they will be treated as a path separators,
+      // giving the base URL here a path of "\".
+      //
+      // The result here is somewhat arbitrary. One could argue it should be
+      // either "aaa://a\" or "aaa://a/" since the path is being replaced with
+      // the "current directory". But in the context of resolving on data URLs,
+      // adding the requested dot doesn't seem wrong either.
+      {"aaa://a\\", "aaa:.", true, "aaa://a\\."}};
+
+  for (size_t i = 0; i < std::size(resolve_non_standard_cases); i++) {
+    const ResolveRelativeCase& test_data = resolve_non_standard_cases[i];
+    Parsed base_parsed;
+    ParsePathURL(test_data.base, strlen(test_data.base), false, &base_parsed);
+
+    std::string resolved;
+    StdStringCanonOutput output(&resolved);
+    Parsed resolved_parsed;
+    bool valid = ResolveRelative(test_data.base, strlen(test_data.base),
+                                 base_parsed, test_data.rel,
+                                 strlen(test_data.rel), NULL, &output,
+                                 &resolved_parsed);
+    output.Complete();
+
+    EXPECT_EQ(test_data.is_valid, valid) << i;
+    if (test_data.is_valid && valid)
+      EXPECT_EQ(test_data.out, resolved) << i;
+  }
+}
+
+TEST_F(URLUtilTest, TestNoRefComponent) {
+  // The hash-mark must be ignored when mailto: scheme is parsed,
+  // even if the URL has a base and relative part.
+  const char* base = "mailto://to/";
+  const char* rel = "any#body";
+
+  Parsed base_parsed;
+  ParsePathURL(base, strlen(base), false, &base_parsed);
+
+  std::string resolved;
+  StdStringCanonOutput output(&resolved);
+  Parsed resolved_parsed;
+
+  bool valid = ResolveRelative(base, strlen(base),
+                               base_parsed, rel,
+                               strlen(rel), NULL, &output,
+                               &resolved_parsed);
+  EXPECT_TRUE(valid);
+  EXPECT_FALSE(resolved_parsed.ref.is_valid());
+}
+
+TEST_F(URLUtilTest, PotentiallyDanglingMarkup) {
+  struct ResolveRelativeCase {
+    const char* base;
+    const char* rel;
+    bool potentially_dangling_markup;
+    const char* out;
+  } cases[] = {
+      {"https://example.com/", "/path<", false, "https://example.com/path%3C"},
+      {"https://example.com/", "\n/path<", true, "https://example.com/path%3C"},
+      {"https://example.com/", "\r/path<", true, "https://example.com/path%3C"},
+      {"https://example.com/", "\t/path<", true, "https://example.com/path%3C"},
+      {"https://example.com/", "/pa\nth<", true, "https://example.com/path%3C"},
+      {"https://example.com/", "/pa\rth<", true, "https://example.com/path%3C"},
+      {"https://example.com/", "/pa\tth<", true, "https://example.com/path%3C"},
+      {"https://example.com/", "/path\n<", true, "https://example.com/path%3C"},
+      {"https://example.com/", "/path\r<", true, "https://example.com/path%3C"},
+      {"https://example.com/", "/path\r<", true, "https://example.com/path%3C"},
+      {"https://example.com/", "\n/<path", true, "https://example.com/%3Cpath"},
+      {"https://example.com/", "\r/<path", true, "https://example.com/%3Cpath"},
+      {"https://example.com/", "\t/<path", true, "https://example.com/%3Cpath"},
+      {"https://example.com/", "/<pa\nth", true, "https://example.com/%3Cpath"},
+      {"https://example.com/", "/<pa\rth", true, "https://example.com/%3Cpath"},
+      {"https://example.com/", "/<pa\tth", true, "https://example.com/%3Cpath"},
+      {"https://example.com/", "/<path\n", true, "https://example.com/%3Cpath"},
+      {"https://example.com/", "/<path\r", true, "https://example.com/%3Cpath"},
+      {"https://example.com/", "/<path\r", true, "https://example.com/%3Cpath"},
+  };
+
+  for (const auto& test : cases) {
+    SCOPED_TRACE(::testing::Message() << test.base << ", " << test.rel);
+    Parsed base_parsed;
+    ParseStandardURL(test.base, strlen(test.base), &base_parsed);
+
+    std::string resolved;
+    StdStringCanonOutput output(&resolved);
+    Parsed resolved_parsed;
+    bool valid =
+        ResolveRelative(test.base, strlen(test.base), base_parsed, test.rel,
+                        strlen(test.rel), NULL, &output, &resolved_parsed);
+    ASSERT_TRUE(valid);
+    output.Complete();
+
+    EXPECT_EQ(test.potentially_dangling_markup,
+              resolved_parsed.potentially_dangling_markup);
+    EXPECT_EQ(test.out, resolved);
+  }
+}
+
+TEST_F(URLUtilTest, PotentiallyDanglingMarkupAfterReplacement) {
+  // Parse a URL with potentially dangling markup.
+  Parsed original_parsed;
+  RawCanonOutput<32> original;
+  const char* url = "htt\nps://example.com/<path";
+  Canonicalize(url, strlen(url), false, nullptr, &original, &original_parsed);
+  ASSERT_TRUE(original_parsed.potentially_dangling_markup);
+
+  // Perform a replacement, and validate that the potentially_dangling_markup
+  // flag carried over to the new Parsed object.
+  Replacements<char> replacements;
+  replacements.ClearRef();
+  Parsed replaced_parsed;
+  RawCanonOutput<32> replaced;
+  ReplaceComponents(original.data(), original.length(), original_parsed,
+                    replacements, nullptr, &replaced, &replaced_parsed);
+  EXPECT_TRUE(replaced_parsed.potentially_dangling_markup);
+}
+
+TEST_F(URLUtilTest, PotentiallyDanglingMarkupAfterSchemeOnlyReplacement) {
+  // Parse a URL with potentially dangling markup.
+  Parsed original_parsed;
+  RawCanonOutput<32> original;
+  const char* url = "http://example.com/\n/<path";
+  Canonicalize(url, strlen(url), false, nullptr, &original, &original_parsed);
+  ASSERT_TRUE(original_parsed.potentially_dangling_markup);
+
+  // Perform a replacement, and validate that the potentially_dangling_markup
+  // flag carried over to the new Parsed object.
+  Replacements<char> replacements;
+  const char* new_scheme = "https";
+  replacements.SetScheme(new_scheme, Component(0, strlen(new_scheme)));
+  Parsed replaced_parsed;
+  RawCanonOutput<32> replaced;
+  ReplaceComponents(original.data(), original.length(), original_parsed,
+                    replacements, nullptr, &replaced, &replaced_parsed);
+  EXPECT_TRUE(replaced_parsed.potentially_dangling_markup);
+}
+
+TEST_F(URLUtilTest, TestDomainIs) {
+  const struct {
+    const char* canonicalized_host;
+    const char* lower_ascii_domain;
+    bool expected_domain_is;
+  } kTestCases[] = {
+      {"google.com", "google.com", true},
+      {"www.google.com", "google.com", true},      // Subdomain is ignored.
+      {"www.google.com.cn", "google.com", false},  // Different TLD.
+      {"www.google.comm", "google.com", false},
+      {"www.iamnotgoogle.com", "google.com", false},  // Different hostname.
+      {"www.google.com", "Google.com", false},  // The input is not lower-cased.
+
+      // If the host ends with a dot, it matches domains with or without a dot.
+      {"www.google.com.", "google.com", true},
+      {"www.google.com.", "google.com.", true},
+      {"www.google.com.", ".com", true},
+      {"www.google.com.", ".com.", true},
+
+      // But, if the host doesn't end with a dot and the input domain does, then
+      // it's considered to not match.
+      {"www.google.com", "google.com.", false},
+
+      // If the host ends with two dots, it doesn't match.
+      {"www.google.com..", "google.com", false},
+
+      // Empty parameters.
+      {"www.google.com", "", false},
+      {"", "www.google.com", false},
+      {"", "", false},
+  };
+
+  for (const auto& test_case : kTestCases) {
+    SCOPED_TRACE(testing::Message() << "(host, domain): ("
+                                    << test_case.canonicalized_host << ", "
+                                    << test_case.lower_ascii_domain << ")");
+
+    EXPECT_EQ(
+        test_case.expected_domain_is,
+        DomainIs(test_case.canonicalized_host, test_case.lower_ascii_domain));
+  }
+}
+
+namespace {
+absl::optional<std::string> CanonicalizeSpec(base::StringPiece spec,
+                                             bool trim_path_end) {
+  std::string canonicalized;
+  StdStringCanonOutput output(&canonicalized);
+  Parsed parsed;
+  if (!Canonicalize(spec.data(), spec.size(), trim_path_end,
+                    /*charset_converter=*/nullptr, &output, &parsed)) {
+    return {};
+  }
+  output.Complete();  // Must be called before string is used.
+  return canonicalized;
+}
+}  // namespace
+
+#if BUILDFLAG(IS_WIN)
+// Regression test for https://crbug.com/1252658.
+TEST_F(URLUtilTest, TestCanonicalizeWindowsPathWithLeadingNUL) {
+  auto PrefixWithNUL = [](std::string&& s) -> std::string { return '\0' + s; };
+  EXPECT_EQ(CanonicalizeSpec(PrefixWithNUL("w:"), /*trim_path_end=*/false),
+            absl::make_optional("file:///W:"));
+  EXPECT_EQ(CanonicalizeSpec(PrefixWithNUL("\\\\server\\share"),
+                             /*trim_path_end=*/false),
+            absl::make_optional("file://server/share"));
+}
+#endif
+
+TEST_F(URLUtilTest, TestCanonicalizeIdempotencyWithLeadingControlCharacters) {
+  std::string spec = "_w:";
+  // Loop over all C0 control characters and the space character.
+  for (char c = '\0'; c <= ' '; c++) {
+    SCOPED_TRACE(testing::Message() << "c: " << c);
+
+    // Overwrite the first character of `spec`. Note that replacing the first
+    // character with NUL will not change the length!
+    spec[0] = c;
+
+    for (bool trim_path_end : {false, true}) {
+      SCOPED_TRACE(testing::Message() << "trim_path_end: " << trim_path_end);
+
+      absl::optional<std::string> canonicalized =
+          CanonicalizeSpec(spec, trim_path_end);
+      ASSERT_TRUE(canonicalized);
+      EXPECT_EQ(canonicalized, CanonicalizeSpec(*canonicalized, trim_path_end));
+    }
+  }
+}
+
+}  // namespace url