Skip to content

Commit

Permalink
Replace "default" token with "whitespace" and "identifier" tokens, wi…
Browse files Browse the repository at this point in the history
…th fallback to "unknown" token.

Also, change backticked identifiers like `foo` to be classified as "identifier" rather than "string.

As a bonus, this simplifies getSegments() from 30 lines down to 8.

About tests though, I only updated the 'getSegments()', other tests are currently failing.
If we want to merge this into master I'll update all the tests and documentation.
  • Loading branch information
wkeese committed Oct 3, 2023
1 parent cf6e042 commit 8cabde8
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 57 deletions.
53 changes: 18 additions & 35 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ const highlighters = [
/\b(?<number> \d+ (?:\.\d+)? )\b/,

// Note: Repeating string escapes like 'sql''server' will also work as they are just repeating strings
/(?<string> '(?: [^'\\] | \\. )*' | "(?: [^"\\] | \\. )*" | `(?: [^`\\] | \\. )*` )/,
/(?<string> '(?: [^'\\] | \\. )*' | "(?: [^"\\] | \\. )*" )/,

/(?<comment> --[^\n\r]* | #[^\n\r]* | \/\* (?: [^*] | \* (?!\/) )* \*\/ )/,

Expand All @@ -35,54 +35,37 @@ const highlighters = [

/(?<bracket> [()] )/,

/(?<special> != | [=%*/\-+,;:<>] )/
/(?<special> != | [=%*/\-+,;:<>.] )/,

/(?<identifier> \b\w+\b | `(?: [^`\\] | \\. )*`)/,

/(?<whitespace> \s+ )/,

/(?<unknown> \.+? )/
]

function getRegexString (regex) {
const str = regex.toString()
return str.replace(/^\/|\/\w*$|[\t ]+/g, '')
}

// Regex of the shape /(.*?)|((?<token1>...)|(?<token2>...)|...|$)/y
// Regex of the shape /((?<token1>...)|(?<token2>...)|...|$)/g
const tokenizer = new RegExp(
'(.*?)(' +
'(' +
'\\b(?<keyword>' + keywords.join('|') + ')\\b|' +
highlighters.map(getRegexString).join('|') +
'|$)', // $ needed to to match "default" till the end of string
'isy'
')',
'gis'
)

function getSegments (sqlString) {
const segments = []
let match

// Reset the starting position
tokenizer.lastIndex = 0

// This is probably the one time when an assignment inside a condition makes sense
// eslint-disable-next-line no-cond-assign
while (match = tokenizer.exec(sqlString)) {
if (match[1]) {
segments.push({
name: DEFAULT_KEYWORD,
content: match[1]
})
}

if (match[2]) {
const name = Object.keys(match.groups).find(key => match.groups[key])
segments.push({
name,
content: match.groups[name]
})
const segments = Array.from(sqlString.matchAll(tokenizer), match => {
const name = Object.keys(match.groups).find(key => match.groups[key])
return {
name,
content: match.groups[name]
}

// Stop at the end of string
if (match.index + match[0].length >= sqlString.length) {
break
}
}

})
return segments
}

Expand Down
44 changes: 22 additions & 22 deletions test/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -254,45 +254,45 @@ describe('html', () => {
})
})

describe('getSegments', () => {
describe.only('getSegments', () => {
it('complex query', () => {
expect(getSegments("SELECT COUNT(id), `id`, `username` FROM `users` WHERE `email` = 'test@example.com' AND `foo` = 'BAR' OR 1=1"))
.toStrictEqual([
{ name: 'keyword', content: 'SELECT' },
{ name: 'default', content: ' ' },
{ name: 'whitespace', content: ' ' },
{ name: 'function', content: 'COUNT' },
{ name: 'bracket', content: '(' },
{ name: 'default', content: 'id' },
{ name: 'identifier', content: 'id' },
{ name: 'bracket', content: ')' },
{ name: 'special', content: ',' },
{ name: 'default', content: ' ' },
{ name: 'string', content: '`id`' },
{ name: 'whitespace', content: ' ' },
{ name: 'identifier', content: '`id`' },
{ name: 'special', content: ',' },
{ name: 'default', content: ' ' },
{ name: 'string', content: '`username`' },
{ name: 'default', content: ' ' },
{ name: 'whitespace', content: ' ' },
{ name: 'identifier', content: '`username`' },
{ name: 'whitespace', content: ' ' },
{ name: 'keyword', content: 'FROM' },
{ name: 'default', content: ' ' },
{ name: 'string', content: '`users`' },
{ name: 'default', content: ' ' },
{ name: 'whitespace', content: ' ' },
{ name: 'identifier', content: '`users`' },
{ name: 'whitespace', content: ' ' },
{ name: 'keyword', content: 'WHERE' },
{ name: 'default', content: ' ' },
{ name: 'string', content: '`email`' },
{ name: 'default', content: ' ' },
{ name: 'whitespace', content: ' ' },
{ name: 'identifier', content: '`email`' },
{ name: 'whitespace', content: ' ' },
{ name: 'special', content: '=' },
{ name: 'default', content: ' ' },
{ name: 'whitespace', content: ' ' },
{ name: 'string', content: "'test@example.com'" },
{ name: 'default', content: ' ' },
{ name: 'whitespace', content: ' ' },
{ name: 'keyword', content: 'AND' },
{ name: 'default', content: ' ' },
{ name: 'string', content: '`foo`' },
{ name: 'default', content: ' ' },
{ name: 'whitespace', content: ' ' },
{ name: 'identifier', content: '`foo`' },
{ name: 'whitespace', content: ' ' },
{ name: 'special', content: '=' },
{ name: 'default', content: ' ' },
{ name: 'whitespace', content: ' ' },
{ name: 'string', content: "'BAR'" },
{ name: 'default', content: ' ' },
{ name: 'whitespace', content: ' ' },
{ name: 'keyword', content: 'OR' },
{ name: 'default', content: ' ' },
{ name: 'whitespace', content: ' ' },
{ name: 'number', content: '1' },
{ name: 'special', content: '=' },
{ name: 'number', content: '1' }
Expand Down

0 comments on commit 8cabde8

Please sign in to comment.