From 320a8dc5d79d4056487b344c170c9a805597c528 Mon Sep 17 00:00:00 2001 From: Luigi Pinca Date: Fri, 23 Jul 2021 18:31:42 +0200 Subject: [PATCH 1/4] [fix] Ignore slashes after the protocol for special URLs Fixes #205 Fixes #206 --- index.js | 51 ++++++++++++++++++++++++++++++++++------ test/test.js | 66 +++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 107 insertions(+), 10 deletions(-) diff --git a/index.js b/index.js index 94e357e..f424acc 100644 --- a/index.js +++ b/index.js @@ -98,6 +98,24 @@ function lolcation(loc) { return finaldestination; } +/** + * Check whether a protocol scheme is special. + * + * @param {String} The protocol scheme of the URL + * @return {Boolean} `true` if the protocol scheme is special, else `false` + * @private + */ +function isSpecial(scheme) { + return ( + scheme === 'file:' || + scheme === 'ftp:' || + scheme === 'http:' || + scheme === 'https:' || + scheme === 'ws:' || + scheme === 'wss:' + ); +} + /** * @typedef ProtocolExtract * @type Object @@ -110,16 +128,32 @@ function lolcation(loc) { * Extract protocol information from a URL with/without double slash ("//"). * * @param {String} address URL we want to extract from. + * @param {Object} location * @return {ProtocolExtract} Extracted information. * @private */ -function extractProtocol(address) { +function extractProtocol(address, location) { address = trimLeft(address); + location = location || {}; - var match = protocolre.exec(address) - , protocol = match[1] ? match[1].toLowerCase() : '' - , slashes = !!(match[2] && match[2].length >= 2) - , rest = match[2] && match[2].length === 1 ? '/' + match[3] : match[3]; + var match = protocolre.exec(address); + var protocol = match[1] ? match[1].toLowerCase() : ''; + var rest = match[2] ? match[2] + match[3] : match[3]; + var slashes = !!(match[2] && match[2].length >= 2); + + if (protocol === 'file:') { + if (slashes) { + rest = rest.slice(2); + } + } else if (isSpecial(protocol)) { + rest = match[3]; + } else if (protocol) { + if (rest.indexOf('//') === 0) { + rest = rest.slice(2); + } + } else if (slashes && location.hostname) { + rest = match[3]; + } return { protocol: protocol, @@ -214,7 +248,7 @@ function Url(address, location, parser) { // // Extract protocol information before running the instructions. // - extracted = extractProtocol(address || ''); + extracted = extractProtocol(address || '', location); relative = !extracted.protocol && !extracted.slashes; url.slashes = extracted.slashes || relative && location.slashes; url.protocol = extracted.protocol || location.protocol || ''; @@ -224,7 +258,10 @@ function Url(address, location, parser) { // When the authority component is absent the URL starts with a path // component. // - if (!extracted.slashes || url.protocol === 'file:') { + if ( + url.protocol === 'file:' || + (!extracted.slashes && !isSpecial(extracted.protocol)) + ) { instructions[3] = [/(.*)/, 'pathname']; } diff --git a/test/test.js b/test/test.js index 38290ed..9a84fba 100644 --- a/test/test.js +++ b/test/test.js @@ -93,7 +93,7 @@ describe('url-parse', function () { assume(parse.extractProtocol('//foo/bar')).eql({ slashes: true, protocol: '', - rest: 'foo/bar' + rest: '//foo/bar' }); }); @@ -283,7 +283,7 @@ describe('url-parse', function () { assume(parsed.href).equals('http://what-is-up.com/'); }); - it('does not see a slash after the protocol as path', function () { + it('ignores slashes after the protocol for special URLs', function () { var url = 'https:\\/github.com/foo/bar' , parsed = parse(url); @@ -292,11 +292,59 @@ describe('url-parse', function () { assume(parsed.pathname).equals('/foo/bar'); url = 'https:/\\/\\/\\github.com/foo/bar'; + parsed = parse(url); assume(parsed.host).equals('github.com'); assume(parsed.hostname).equals('github.com'); assume(parsed.pathname).equals('/foo/bar'); + + url = 'https:/github.com/foo/bar'; + parsed = parse(url); + assume(parsed.host).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); + + url = 'https:\\github.com/foo/bar'; + parsed = parse(url); + assume(parsed.host).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); + + url = 'https:github.com/foo/bar'; + parsed = parse(url); + assume(parsed.host).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); + + url = 'https:github.com/foo/bar'; + parsed = parse(url); + assume(parsed.host).equals('github.com'); + assume(parsed.pathname).equals('/foo/bar'); }); + it('handles slashes after the protocol for non special URLs', function () { + var url = 'foo:example.com' + , parsed = parse(url); + + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('example.com'); + assume(parsed.href).equals('foo:example.com'); + + url = 'foo:/example.com'; + parsed = parse(url); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('/example.com'); + assume(parsed.href).equals('foo:/example.com'); + + url = 'foo://example.com'; + parsed = parse(url); + assume(parsed.hostname).equals('example.com'); + assume(parsed.pathname).equals('/'); + assume(parsed.href).equals('foo://example.com/'); + + url = 'foo:///example.com'; + parsed = parse(url); + assume(parsed.hostname).equals(''); + assume(parsed.pathname).equals('/example.com'); + assume(parsed.href).equals('foo:///example.com'); + }) + describe('origin', function () { it('generates an origin property', function () { var url = 'http://google.com:80/pathname' @@ -440,7 +488,7 @@ describe('url-parse', function () { }); it('handles the file: protocol', function () { - var slashes = ['', '/', '//', '///', '////', '/////']; + var slashes = ['', '/', '//', '///']; var data; var url; @@ -451,6 +499,18 @@ describe('url-parse', function () { assume(data.href).equals('file:///'); } + url = 'file:////'; + data = parse(url); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('//'); + assume(data.href).equals(url); + + url = 'file://///'; + data = parse(url); + assume(data.protocol).equals('file:'); + assume(data.pathname).equals('///'); + assume(data.href).equals(url); + url = 'file:///Users/foo/BAR/baz.pdf'; data = parse(url); assume(data.protocol).equals('file:'); From a3bd3817fcda30c273e4e4a26a0feba9fe5dfc09 Mon Sep 17 00:00:00 2001 From: Luigi Pinca Date: Sat, 24 Jul 2021 09:27:05 +0200 Subject: [PATCH 2/4] [fix] Do not incorrectly set the `slashes` property to `true` Set it to `true` only if the protocol is special or if it is actually followed by two forward slashes. --- index.js | 44 ++++++++++++++++++++++++++++++++------------ test/test.js | 40 ++++++++++++++++++++++++++++++++++------ 2 files changed, 66 insertions(+), 18 deletions(-) diff --git a/index.js b/index.js index f424acc..73b53f6 100644 --- a/index.js +++ b/index.js @@ -2,8 +2,8 @@ var required = require('requires-port') , qs = require('querystringify') - , slashes = /^[A-Za-z][A-Za-z0-9+-.]*:[\\/]+/ - , protocolre = /^([a-z][a-z0-9.+-]*:)?([\\/]{1,})?([\S\s]*)/i + , slashes = /^[A-Za-z][A-Za-z0-9+-.]*:\/\// + , protocolre = /^([a-z][a-z0-9.+-]*:)?(\/\/)?([\\/]+)?([\S\s]*)/i , whitespace = '[\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\xA0\\u1680\\u180E\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200A\\u202F\\u205F\\u3000\\u2028\\u2029\\uFEFF]' , left = new RegExp('^'+ whitespace +'+'); @@ -138,26 +138,46 @@ function extractProtocol(address, location) { var match = protocolre.exec(address); var protocol = match[1] ? match[1].toLowerCase() : ''; - var rest = match[2] ? match[2] + match[3] : match[3]; - var slashes = !!(match[2] && match[2].length >= 2); + var forwardSlashes = !!match[2]; + var otherSlashes = !!match[3]; + var slashesCount = 0; + var rest; + + if (forwardSlashes) { + if (otherSlashes) { + rest = match[2] + match[3] + match[4]; + slashesCount = match[2].length + match[3].length; + } else { + rest = match[2] + match[4]; + slashesCount = match[2].length; + } + } else { + if (otherSlashes) { + rest = match[3] + match[4]; + slashesCount = match[3].length; + } else { + rest = match[4] + } + } if (protocol === 'file:') { - if (slashes) { + if (slashesCount >= 2) { rest = rest.slice(2); } } else if (isSpecial(protocol)) { - rest = match[3]; + rest = match[4]; } else if (protocol) { - if (rest.indexOf('//') === 0) { + if (forwardSlashes) { rest = rest.slice(2); } - } else if (slashes && location.hostname) { - rest = match[3]; + } else if (slashesCount >= 2 && location.hostname) { + rest = match[4]; } return { protocol: protocol, - slashes: slashes, + slashes: forwardSlashes || isSpecial(protocol), + slashesCount: slashesCount, rest: rest }; } @@ -260,7 +280,7 @@ function Url(address, location, parser) { // if ( url.protocol === 'file:' || - (!extracted.slashes && !isSpecial(extracted.protocol)) + (extracted.slashesCount < 2 && !isSpecial(extracted.protocol)) ) { instructions[3] = [/(.*)/, 'pathname']; } @@ -472,7 +492,7 @@ function toString(stringify) { if (protocol && protocol.charAt(protocol.length - 1) !== ':') protocol += ':'; - var result = protocol + (url.slashes || url.protocol === 'file:' ? '//' : ''); + var result = protocol + (url.slashes || isSpecial(url.protocol) ? '//' : ''); if (url.username) { result += url.username; diff --git a/test/test.js b/test/test.js index 9a84fba..71cc473 100644 --- a/test/test.js +++ b/test/test.js @@ -71,7 +71,8 @@ describe('url-parse', function () { assume(parse.extractProtocol('http://example.com')).eql({ slashes: true, protocol: 'http:', - rest: 'example.com' + rest: 'example.com', + slashesCount: 2 }); }); @@ -79,7 +80,8 @@ describe('url-parse', function () { assume(parse.extractProtocol('')).eql({ slashes: false, protocol: '', - rest: '' + rest: '', + slashesCount: 0 }); }); @@ -87,13 +89,15 @@ describe('url-parse', function () { assume(parse.extractProtocol('/foo')).eql({ slashes: false, protocol: '', - rest: '/foo' + rest: '/foo', + slashesCount: 1 }); assume(parse.extractProtocol('//foo/bar')).eql({ slashes: true, protocol: '', - rest: '//foo/bar' + rest: '//foo/bar', + slashesCount: 2 }); }); @@ -103,7 +107,8 @@ describe('url-parse', function () { assume(parse.extractProtocol(input)).eql({ slashes: false, protocol: '', - rest: input + rest: input, + slashesCount: 0 }); }); @@ -111,7 +116,8 @@ describe('url-parse', function () { assume(parse.extractProtocol(' javascript://foo')).eql({ slashes: true, protocol: 'javascript:', - rest: 'foo' + rest: 'foo', + slashesCount: 2 }); }); }); @@ -281,6 +287,12 @@ describe('url-parse', function () { assume(parsed.host).equals('what-is-up.com'); assume(parsed.href).equals('http://what-is-up.com/'); + + url = '\\\\\\\\what-is-up.com' + parsed = parse(url, parse('http://google.com')); + + assume(parsed.host).equals('what-is-up.com'); + assume(parsed.href).equals('http://what-is-up.com/'); }); it('ignores slashes after the protocol for special URLs', function () { @@ -290,32 +302,44 @@ describe('url-parse', function () { assume(parsed.host).equals('github.com'); assume(parsed.hostname).equals('github.com'); assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); url = 'https:/\\/\\/\\github.com/foo/bar'; parsed = parse(url); assume(parsed.host).equals('github.com'); assume(parsed.hostname).equals('github.com'); assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); url = 'https:/github.com/foo/bar'; parsed = parse(url); assume(parsed.host).equals('github.com'); assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); url = 'https:\\github.com/foo/bar'; parsed = parse(url); assume(parsed.host).equals('github.com'); assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); url = 'https:github.com/foo/bar'; parsed = parse(url); assume(parsed.host).equals('github.com'); assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); url = 'https:github.com/foo/bar'; parsed = parse(url); assume(parsed.host).equals('github.com'); assume(parsed.pathname).equals('/foo/bar'); + assume(parsed.slashes).is.true(); + assume(parsed.href).equals('https://github.com/foo/bar'); }); it('handles slashes after the protocol for non special URLs', function () { @@ -325,24 +349,28 @@ describe('url-parse', function () { assume(parsed.hostname).equals(''); assume(parsed.pathname).equals('example.com'); assume(parsed.href).equals('foo:example.com'); + assume(parsed.slashes).is.false(); url = 'foo:/example.com'; parsed = parse(url); assume(parsed.hostname).equals(''); assume(parsed.pathname).equals('/example.com'); assume(parsed.href).equals('foo:/example.com'); + assume(parsed.slashes).is.false(); url = 'foo://example.com'; parsed = parse(url); assume(parsed.hostname).equals('example.com'); assume(parsed.pathname).equals('/'); assume(parsed.href).equals('foo://example.com/'); + assume(parsed.slashes).is.true(); url = 'foo:///example.com'; parsed = parse(url); assume(parsed.hostname).equals(''); assume(parsed.pathname).equals('/example.com'); assume(parsed.href).equals('foo:///example.com'); + assume(parsed.slashes).is.true(); }) describe('origin', function () { From eda1342a352b980710e1947e7b0c617e53894310 Mon Sep 17 00:00:00 2001 From: Luigi Pinca Date: Sat, 24 Jul 2021 09:35:58 +0200 Subject: [PATCH 3/4] [fix] Add a leading slash only if the URL is special If the value of the `pathname` property does not start with a `/`, add it only if the URL is special. --- index.js | 5 +---- test/test.js | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/index.js b/index.js index 73b53f6..6f2299a 100644 --- a/index.js +++ b/index.js @@ -347,10 +347,7 @@ function Url(address, location, parser) { // Default to a / for pathname if none exists. This normalizes the URL // to always have a / // - if ( - url.pathname.charAt(0) !== '/' - && (url.hostname || url.protocol === 'file:') - ) { + if (url.pathname.charAt(0) !== '/' && isSpecial(url.protocol)) { url.pathname = '/' + url.pathname; } diff --git a/test/test.js b/test/test.js index 71cc473..fc240fc 100644 --- a/test/test.js +++ b/test/test.js @@ -361,8 +361,8 @@ describe('url-parse', function () { url = 'foo://example.com'; parsed = parse(url); assume(parsed.hostname).equals('example.com'); - assume(parsed.pathname).equals('/'); - assume(parsed.href).equals('foo://example.com/'); + assume(parsed.pathname).equals(''); + assume(parsed.href).equals('foo://example.com'); assume(parsed.slashes).is.true(); url = 'foo:///example.com'; From e24fca31c61cc38f912c87225e28251d1b72599e Mon Sep 17 00:00:00 2001 From: Luigi Pinca Date: Sat, 24 Jul 2021 18:19:10 +0200 Subject: [PATCH 4/4] [fix] Use `'null'` as `origin` for non special URLs --- index.js | 4 ++-- test/test.js | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/index.js b/index.js index 6f2299a..8e31ae3 100644 --- a/index.js +++ b/index.js @@ -371,7 +371,7 @@ function Url(address, location, parser) { url.password = instruction[1] || ''; } - url.origin = url.protocol && url.host && url.protocol !== 'file:' + url.origin = url.protocol !== 'file:' && isSpecial(url.protocol) && url.host ? url.protocol +'//'+ url.host : 'null'; @@ -464,7 +464,7 @@ function set(part, value, fn) { if (ins[4]) url[ins[1]] = url[ins[1]].toLowerCase(); } - url.origin = url.protocol && url.host && url.protocol !== 'file:' + url.origin = url.protocol !== 'file:' && isSpecial(url.protocol) && url.host ? url.protocol +'//'+ url.host : 'null'; diff --git a/test/test.js b/test/test.js index fc240fc..d5a6cab 100644 --- a/test/test.js +++ b/test/test.js @@ -395,6 +395,13 @@ describe('url-parse', function () { assume(parsed.origin).equals('null'); }); + it('is null for non special URLs', function () { + var o = parse('foo://example.com/pathname'); + assume(o.hostname).equals('example.com'); + assume(o.pathname).equals('/pathname'); + assume(o.origin).equals('null'); + }); + it('removes default ports for http', function () { var o = parse('http://google.com:80/pathname'); assume(o.origin).equals('http://google.com');