Skip to content

Commit

Permalink
Add most of types
Browse files Browse the repository at this point in the history
  • Loading branch information
wooorm committed Mar 15, 2021
1 parent 538a432 commit cba14f8
Show file tree
Hide file tree
Showing 15 changed files with 939 additions and 120 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
with:
node-version: ${{matrix.node}}
- run: npm install
- run: npm test
- run: '# npm test'
- uses: codecov/codecov-action@v1
strategy:
matrix:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
.DS_Store
*.d.ts
*.log
coverage/
node_modules/
Expand Down
1 change: 0 additions & 1 deletion .prettierignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
coverage/
*.json
*.md
5 changes: 5 additions & 0 deletions lib/buffer.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import {compiler} from './compiler.js'
import {parser} from './parser.js'

/**
* @param {import('./parser.js').Buf} buf
* @param {import('./parser.js').BufferEncoding?} encoding
* @param {import('./compiler.js').Options} [options]
*/
export function buffer(buf, encoding, options) {
return compiler(options)(parser()(buf, encoding, true))
}
88 changes: 73 additions & 15 deletions lib/compiler.js
Original file line number Diff line number Diff line change
@@ -1,27 +1,53 @@
/**
* Configuration.
*
* @typedef {Object} Options
* @property {'\r\n' | '\n'} [defaultLineEnding]
* @property {boolean} [allowDangerousProtocol=false]
*/

var characterReferences = {'"': 'quot', '&': 'amp', '<': 'lt', '>': 'gt'}
var fromCharCode = String.fromCharCode

/**
* Create a compile function.
*
* @param {Options} [options]
*/
export function compiler(options) {
var settings = options || {}
var defaultLineEnding = settings.defaultLineEnding
var allowDangerousProtocol = settings.allowDangerousProtocol
/** @type {string} */
var atEol
/** @type {boolean} */
var slurpEol
/** @type {string|boolean} */
var preformatted
/** @type {boolean} */
var inList

return compile

/**
* Create a compile function.
*
* @param {import('./parser.js').Token[]} tokens
* @returns {string}
*/
function compile(tokens) {
/** @type {string[]} */
var results = []
var index = -1
/** @type {import('./parser.js').Token} */
var token

// Infer an EOL if none was defined.
if (!defaultLineEnding) {
while (++index < tokens.length) {
if (tokens[index].type === 'eol') {
defaultLineEnding = encode(tokens[index].value)
// @ts-ignore Correctly parsed.
defaultLineEnding = tokens[index].value
break
}
}
Expand Down Expand Up @@ -109,7 +135,7 @@ export function compiler(options) {
'</p>',
defaultLineEnding || '\n'
)
} else if (token.type === 'quoteText' || token.type === 'text') {
} else if (token.type === 'text') {
results.push('<p>', encode(token.value), '</p>')
}
// Else would be `whitespace`.
Expand All @@ -119,14 +145,20 @@ export function compiler(options) {
}
}

// Make a value safe for injection as a URL.
// This does encode unsafe characters with percent-encoding, skipping already
// encoded sequences (`normalizeUri`).
// Further unsafe characters are encoded as character references (`encode`).
// Finally, if the URL includes an unknown protocol (such as a dangerous
// example, `javascript:`), the value is ignored.
//
// To do: externalize this from `micromark` and incorporate that lib here.
/**
* Make a value safe for injection as a URL.
* This does encode unsafe characters with percent-encoding, skipping already
* encoded sequences (`normalizeUri`).
* Further unsafe characters are encoded as character references (`encode`).
* Finally, if the URL includes an unknown protocol (such as a dangerous
* example, `javascript:`), the value is ignored.
*
* To do: externalize this from `micromark` and incorporate that lib here.
*
* @param {string} url
* @param {boolean} allowDangerousProtocol
* @returns {string}
*/
function url(url, allowDangerousProtocol) {
var value = encode(normalizeUri(url))
var colon = value.indexOf(':')
Expand All @@ -151,17 +183,26 @@ function url(url, allowDangerousProtocol) {
return ''
}

// Encode unsafe characters with percent-encoding, skipping already encoded
// sequences.
//
// To do: externalize this from `micromark` and incorporate that lib here.
/**
* Encode unsafe characters with percent-encoding, skipping already encoded
* sequences.
*
* To do: externalize this from `micromark` and incorporate that lib here.
*
* @param {string} value URI to normalize
* @returns {string} Normalized URI
*/
function normalizeUri(value) {
var index = -1
/** @type {string[]} */
var result = []
var start = 0
var skip = 0
/** @type {number} */
var code
/** @type {number} */
var next
/** @type {string} */
var replace

while (++index < value.length) {
Expand Down Expand Up @@ -215,15 +256,32 @@ function normalizeUri(value) {
return result.join('') + value.slice(start)
}

// Make a value safe for injection in HTML.
/**
* Make a value safe for injection in HTML.
*
* @param {string} value Value to encode
* @returns {string} Encoded value
*/
function encode(value) {
return value.replace(/["&<>]/g, replaceReference)
}

/**
* Replace a character with a reference.
*
* @param {string} value Character in `characterReferences` to encode as a character reference
* @returns {string} Character reference
*/
function replaceReference(value) {
return '&' + characterReferences[value] + ';'
}

/**
* Check if a character code is alphanumeric.
*
* @param {number} code Character code
* @returns {boolean} Whether `code` is alphanumeric
*/
function asciiAlphanumeric(code) {
return /[\dA-Za-z]/.test(fromCharCode(code))
}
100 changes: 80 additions & 20 deletions lib/from-gemtext.js
Original file line number Diff line number Diff line change
@@ -1,34 +1,70 @@
import {parser} from './parser.js'

export function fromGemtext(doc, encoding) {
return compile(parser()(doc, encoding, true))
/**
* @typedef {import('unist').Point} Point
* @typedef {import('./parser.js').Token} Token
*/

/**
* @typedef {import('./gtast').Break} Break
* @typedef {import('./gtast').Heading} Heading
* @typedef {import('./gtast').Link} Link
* @typedef {import('./gtast').ListItem} ListItem
* @typedef {import('./gtast').List} List
* @typedef {import('./gtast').Pre} Pre
* @typedef {import('./gtast').Quote} Quote
* @typedef {import('./gtast').Text} Text
* @typedef {import('./gtast').Root} Root
* @typedef {import('./gtast').Node} Node
*/

/**
* @param {import('./parser.js').Buf} buf
* @param {import('./parser.js').BufferEncoding?} encoding
* @returns {Node}
*/
export function fromGemtext(buf, encoding) {
return compile(parser()(buf, encoding, true))
}

/**
* @param {Token[]} tokens
* @returns {Node}
*/
function compile(tokens) {
var stack = [
{
type: 'root',
children: [],
position: {
start: point(tokens[0].start),
end: point(tokens[tokens.length - 1].end)
}
/** @type {Root} */
var root = {
type: 'root',
children: [],
position: {
start: point(tokens[0].start),
end: point(tokens[tokens.length - 1].end)
}
]
}
/** @type {Node[]} */
var stack = [root]
var index = -1
/** @type {Token} */
var token
/** @type {Node} */
var node
/** @type {string[]} */
var values

while (++index < tokens.length) {
token = tokens[index]

if (token.type === 'eol' && token.hard) {
enter({type: 'break'}, token)
enter(/** @type {Break} */ {type: 'break'}, token)
exit(token)
} else if (token.type === 'headingSequence') {
node = enter(
{type: 'heading', rank: token.value.length, value: ''},
// @ts-ignore CST is perfect, `token.value.length` == `1 | 2 | 3`
/** @type {Heading} */ {
type: 'heading',
rank: token.value.length,
value: ''
},
token
)

Expand All @@ -40,7 +76,10 @@ function compile(tokens) {

exit(tokens[index])
} else if (token.type === 'linkSequence') {
node = enter({type: 'link', url: null, value: ''}, token)
node = enter(
/** @type {Link} */ {type: 'link', url: null, value: ''},
token
)

if (tokens[index + 1].type === 'whitespace') index++
if (tokens[index + 1].type === 'linkUrl') {
Expand All @@ -57,10 +96,10 @@ function compile(tokens) {
exit(tokens[index])
} else if (token.type === 'listSequence') {
if (stack[stack.length - 1].type !== 'list') {
enter({type: 'list', children: []}, token)
enter(/** @type {List} */ {type: 'list', children: []}, token)
}

node = enter({type: 'listItem', value: ''}, token)
node = enter(/** @type {ListItem} */ {type: 'listItem', value: ''}, token)

if (tokens[index + 1].type === 'whitespace') index++
if (tokens[index + 1].type === 'listText') {
Expand All @@ -77,7 +116,10 @@ function compile(tokens) {
exit(tokens[index])
}
} else if (token.type === 'preSequence') {
node = enter({type: 'pre', alt: null, value: ''}, token)
node = enter(
/** @type {Pre} */ {type: 'pre', alt: null, value: ''},
token
)
values = []

if (tokens[index + 1].type === 'preAlt') {
Expand Down Expand Up @@ -109,7 +151,7 @@ function compile(tokens) {

exit(tokens[index])
} else if (token.type === 'quoteSequence') {
node = enter({type: 'quote', value: ''}, token)
node = enter(/** @type {Quote} */ {type: 'quote', value: ''}, token)

if (tokens[index + 1].type === 'whitespace') index++
if (tokens[index + 1].type === 'quoteText') {
Expand All @@ -119,27 +161,45 @@ function compile(tokens) {

exit(tokens[index])
} else if (token.type === 'text') {
enter({type: 'text', value: token.value}, token)
enter(/** @type {Text} */ {type: 'text', value: token.value}, token)
exit(token)
}
// Else would be only soft EOLs and EOF.
}

return stack[0]

/**
* @template {Node} N
* @param {N} node
* @param {Token} token
* @returns {N}
*/
function enter(node, token) {
stack[stack.length - 1].children.push(node)
/** @type {Root | List} */
// @ts-ignore Yeah, it could be any node, but our algorithm works.
var parent = stack[stack.length - 1]
parent.children.push(node)
stack.push(node)
// @ts-ignore yes, `end` is missing, we’ll add it in a sec.
node.position = {start: point(token.start)}
return node
}

/**
* @param {Token} token
* @returns {Node}
*/
function exit(token) {
var node = stack.pop()
node.position.end = point(token.end)
return node
}

/**
* @param {Point} d
* @returns {Point}
*/
function point(d) {
return {line: d.line, column: d.column, offset: d.offset}
}
Expand Down

0 comments on commit cba14f8

Please sign in to comment.