From adbe1f69647b0bef47c8f8d20996116fc19f8413 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9e=20Kooi?= Date: Wed, 22 Jan 2020 16:45:30 +0100 Subject: [PATCH] typescriptify (#70) * typescriptify * add types key to package.json * add some comments * use a const enum for internal token types * same argument order all around --- .nycrc | 5 +- package.json | 23 +-- rollup.config.js | 6 +- src/index.js | 185 ------------------- src/index.ts | 273 +++++++++++++++++++++++++++++ src/{url-regex.js => url-regex.ts} | 2 +- test/index.js | 6 +- test/mocha.opts | 1 - tsconfig.json | 13 ++ 9 files changed, 301 insertions(+), 213 deletions(-) delete mode 100644 src/index.js create mode 100644 src/index.ts rename src/{url-regex.js => url-regex.ts} (92%) delete mode 100644 test/mocha.opts create mode 100644 tsconfig.json diff --git a/.nycrc b/.nycrc index 0107782..e61ef92 100644 --- a/.nycrc +++ b/.nycrc @@ -1,11 +1,10 @@ { - "include": ["src/**/*.js"], + "include": ["src/*.ts", "dist/*.js"], "lines": 100, "branches": 100, "statements": 100, "functions": 100, "reporter": ["lcov"], "check-coverage": true, - "sourceMap": false, - "instrument": false + "sourceMap": true } diff --git a/package.json b/package.json index 078b0f2..2eb1812 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,7 @@ "author": "Renée Kooi ", "main": "dist/u-wave-parse-chat-markup.js", "module": "dist/u-wave-parse-chat-markup.mjs", - "jsnext:main": "src/index.js", + "types": "dist/index.d.ts", "repository": "u-wave/parse-chat-markup", "keywords": [ "u-wave" @@ -14,26 +14,15 @@ "bugs": "https://github.com/u-wave/parse-chat-markup/issues", "homepage": "https://github.com/u-wave/parse-chat-markup#readme", "scripts": { - "prepublish": "npm run build", - "build": "rollup -c", - "test:lint": "eslint .", - "test:mocha": "cross-env BABEL_ENV=test mocha", - "cov": "nyc npm run test:mocha", - "test": "npm run cov && npm run test:lint" + "prepare": "rollup -c", + "test": "npm run prepare && nyc mocha" }, "devDependencies": { - "@babel/core": "^7.0.0", - "@babel/preset-env": "^7.0.0", - "@babel/register": "^7.0.0", - "babel-plugin-istanbul": "^6.0.0", "chai": "^4.1.2", - "cross-env": "^6.0.0", - "eslint": "^6.0.0", - "eslint-config-airbnb-base": "^14.0.0", - "eslint-plugin-import": "^2.18.2", "mocha": "^6.0.0", "nyc": "^15.0.0", - "rollup": "^1.0.0", - "rollup-plugin-babel": "^4.0.2" + "rollup": "^1.29.1", + "rollup-plugin-typescript2": "^0.25.3", + "typescript": "^3.7.5" } } diff --git a/rollup.config.js b/rollup.config.js index a834129..7078b0c 100644 --- a/rollup.config.js +++ b/rollup.config.js @@ -1,14 +1,14 @@ -import babel from 'rollup-plugin-babel'; +import typescript from 'rollup-plugin-typescript2'; const pkg = require('./package.json'); export default { - input: './src/index.js', + input: './src/index.ts', output: [ { format: 'cjs', file: pkg.main, exports: 'named' }, { format: 'es', file: pkg.module }, ], plugins: [ - babel(), + typescript(), ], }; diff --git a/src/index.js b/src/index.js deleted file mode 100644 index ac797a3..0000000 --- a/src/index.js +++ /dev/null @@ -1,185 +0,0 @@ -import urlRegExp from './url-regex'; - -function escapeStringRegExp(str) { - return str.replace(/[|\\{}()[\]^$+*?.]/g, '\\$&'); -} - -function Token(type, text, raw = text) { - this.type = type; - this.text = text; - this.raw = raw; -} - -/** - * Sort users by username length. Longest usernames first. - * - * @param {Array.} users - * @return {Array.} - */ - -function sortMentions(mentions) { - return mentions.slice().sort((a, b) => b.length - a.length); -} - -/** - * Create a regex that matches a specific username or group being mentioned. - * - * @param {string} mention Mentionable name. - * @return {RegExp} - */ -function mentionRegExp(mention) { - return new RegExp(`^${escapeStringRegExp(mention)}(?:\\b|\\s|\\W|$)`, 'i'); -} - -/** - * Case-insensitively get the correct emoji name from the possible emoji for an - * input string. - * - * @param {Array.} names All possible emoji names. - * @param {string} match The input string. - * @return {string|null} The correct emoji name (including casing), or `null` if - * the requested emoji does not exist. - */ -function findEmoji(names, match) { - const compare = match.toLowerCase(); - for (let i = 0; i < names.length; i += 1) { - const name = names[i].toLowerCase(); - if (name === compare) { - return names[i]; - } - } - - return null; -} - -function tokenize(text, opts) { - let chunk; - let i = 0; - const mentions = sortMentions(opts.mentions || []); - const tokens = []; - // adds a token of type `type` if the current chunk starts with - // a `delim`-delimited string - const delimited = (start, endRx, type) => { - if (chunk[0] === start && chunk[1] !== start) { - const end = 1 + chunk.slice(1).search(endRx); - if (end) { - tokens.push(new Token(type, chunk.slice(1, end))); - i += end + 1; - return true; - } - } - return false; - }; - const emoji = (type, emojiNames) => { - const match = /^:([A-Za-z0-9_+-]+):/.exec(chunk); - if (match) { - // if a whitelist of emoji names is given, only accept emoji from that - // list. - const emojiName = emojiNames ? findEmoji(emojiNames, match[1]) : match[1]; - if (emojiName) { - tokens.push(new Token(type, emojiName, match[0])); - i += match[0].length; - return true; - } - } - return false; - }; - const mention = (start, type) => { - if (chunk[0] === start) { - const maybeMention = chunk.slice(1); - for (let mi = 0, ml = mentions.length; mi < ml; mi += 1) { - const candidate = mentions[mi]; - if (mentionRegExp(candidate).test(maybeMention)) { - const end = candidate.length + 1; - tokens.push(new Token(type, chunk.slice(1, end), chunk.slice(0, end))); - i += end; - return true; - } - } - } - return false; - }; - const linkRx = new RegExp(`^${urlRegExp().source}`, 'i'); - const link = (type) => { - const match = linkRx.exec(chunk); - if (match) { - tokens.push(new Token(type, chunk.slice(0, match[0].length))); - i += match[0].length; - return true; - } - return false; - }; - // eat spaces - const space = () => { - // .slice again because `i` changed - const m = /^\s+/.exec(text.slice(i)); - if (m) { - tokens.push(new Token('word', m[0])); - i += m[0].length; - } - }; - // tokenize text, just loop until it's done! - chunk = text; - while (chunk) { - const found = emoji('emoji', opts.emojiNames) - || delimited('_', /_(\W|$)/, 'italic') - || delimited('*', /\*(\W|$)/, 'bold') - || delimited('`', /`(\W|$)/, 'code') - || delimited('~', /~(\W|$)/, 'strike') - || mention('@', 'mention') - || link('link'); - if (!found) { - let end = chunk.indexOf(' ', 1) + /* eat space */ 1; - if (end === 0) { // no match, = -1 + 1 - end = chunk.length; - } - // append to previous token if it was also a word - if (tokens.length > 0 && tokens[tokens.length - 1].type === 'word') { - tokens[tokens.length - 1].text += chunk.slice(0, end); - } else { - tokens.push(new Token('word', chunk.slice(0, end))); - } - i += end; - } - space(); - chunk = text.slice(i); - } - return tokens; -} - -function httpify(text) { - if (!/^[a-z]+:/.test(text)) { - return `http://${text}`; - } - return text; -} - -// Parses a chat message into a tree-ish structure. -// Options: -// * mentions: Names that can be mentioned. -export default function parse(message, opts = {}) { - if (typeof message !== 'string') { - throw new TypeError('Expected a string'); - } - - return tokenize(message, opts).map((token) => { - switch (token.type) { - case 'italic': - return { type: 'italic', content: parse(token.text, opts) }; - case 'bold': - return { type: 'bold', content: parse(token.text, opts) }; - case 'code': - return { type: 'code', content: [token.text] }; - case 'strike': - return { type: 'strike', content: parse(token.text, opts) }; - case 'emoji': - return { type: 'emoji', name: token.text }; - case 'mention': - return { type: 'mention', mention: token.text.toLowerCase(), raw: token.text }; - case 'link': - return { type: 'link', text: token.text, href: httpify(token.text) }; - default: - return token.text; - } - }); -} diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..91e25f8 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,273 @@ +import urlRegExp from './url-regex'; + +/** + * A node of italicised text. + */ +export type ItalicNode = { + type: 'italic', + content: MarkupNode[], +}; + +/** + * A node of bold text. + */ +export type BoldNode = { + type: 'bold', + content: MarkupNode[], +}; + +/** + * A code node, containing unstyled text. + */ +export type CodeNode = { + type: 'code', + content: [string], +}; + +/** + * A node of struck-through text. + */ +export type StrikeNode = { + type: 'strike', + content: MarkupNode[], +}; + +/** + * An emoji. + */ +export type EmojiNode = { + type: 'emoji', + name: string, +}; + +/** + * A node that mentions a user. + */ +export type MentionNode = { + type: 'mention', + mention: string, + raw: string, +}; + +/** + * A node that contains a web link. + */ +export type LinkNode = { + type: 'link', + text: string, + href: string, +}; + +/** + * Markup node types: either raw text or one of the Node types. + */ +export type MarkupNode = string | ItalicNode | BoldNode | CodeNode | StrikeNode | EmojiNode | MentionNode | LinkNode; + +/** + * Options for the parser. + */ +export type MarkupOptions = { + /** + * The names of the available :emoji: shortcodes. + */ + emojiNames?: string[], + /** + * Usernames that can be mentioned. + */ + mentions?: string[], +}; + +function escapeStringRegExp(str: string) { + return str.replace(/[|\\{}()[\]^$+*?.]/g, '\\$&'); +} + +const enum TokenType { + Italic, + Bold, + Code, + Strike, + Emoji, + Mention, + Link, + Text, +}; + +interface Token { + type: TokenType; + text: string; + raw: string; +}; + +function createToken(type: TokenType, text: string, raw: string = text): Token { + return { type, text, raw}; +} + +/** + * Sort users by username length. Longest usernames first. + */ +function sortMentions(mentions: string[]): string[] { + return mentions.slice().sort((a, b) => b.length - a.length); +} + +/** + * Create a regex that matches a specific username or group being mentioned. + * + * @param {string} mention Mentionable name. + * @return {RegExp} + */ +function mentionRegExp(mention: string): RegExp { + return new RegExp(`^${escapeStringRegExp(mention)}(?:\\b|\\s|\\W|$)`, 'i'); +} + +/** + * Case-insensitively get the correct emoji name from the possible emoji for an + * input string. + * + * @param {Array.} names All possible emoji names. + * @param {string} match The input string. + * @return {string|null} The correct emoji name (including casing), or `null` if + * the requested emoji does not exist. + */ +function findEmoji(names: string[], match: string): string | null { + const compare = match.toLowerCase(); + for (let i = 0; i < names.length; i += 1) { + const name = names[i].toLowerCase(); + if (name === compare) { + return names[i]; + } + } + + return null; +} + +function tokenize(text: string, options: MarkupOptions) { + let chunk: string; + let i = 0; + const mentions = sortMentions(options.mentions || []); + const tokens: Token[] = []; + // adds a token of type `type` if the current chunk starts with + // a `delim`-delimited string + const delimited = (type: TokenType, start: string, endRx: RegExp) => { + if (chunk[0] === start && chunk[1] !== start) { + const end = 1 + chunk.slice(1).search(endRx); + if (end) { + tokens.push(createToken(type, chunk.slice(1, end))); + i += end + 1; + return true; + } + } + return false; + }; + const emoji = (type: TokenType, emojiNames?: string[]) => { + const match = /^:([A-Za-z0-9_+-]+):/.exec(chunk); + if (match) { + // if a whitelist of emoji names is given, only accept emoji from that + // list. + const emojiName = emojiNames ? findEmoji(emojiNames, match[1]) : match[1]; + if (emojiName) { + tokens.push(createToken(type, emojiName, match[0])); + i += match[0].length; + return true; + } + } + return false; + }; + const mention = (type: TokenType, start: string) => { + if (chunk[0] === start) { + const maybeMention = chunk.slice(1); + for (let mi = 0, ml = mentions.length; mi < ml; mi += 1) { + const candidate = mentions[mi]; + if (mentionRegExp(candidate).test(maybeMention)) { + const end = candidate.length + 1; + tokens.push(createToken(type, chunk.slice(1, end), chunk.slice(0, end))); + i += end; + return true; + } + } + } + return false; + }; + const linkRx = new RegExp(`^${urlRegExp().source}`, 'i'); + const link = (type: TokenType) => { + const match = linkRx.exec(chunk); + if (match) { + tokens.push(createToken(type, chunk.slice(0, match[0].length))); + i += match[0].length; + return true; + } + return false; + }; + // eat spaces + const space = () => { + // .slice again because `i` changed + const m = /^\s+/.exec(text.slice(i)); + if (m) { + tokens.push(createToken(TokenType.Text, m[0])); + i += m[0].length; + } + }; + // tokenize text, just loop until it's done! + chunk = text; + while (chunk) { + const found = emoji(TokenType.Emoji, options.emojiNames) + || delimited(TokenType.Italic, '_', /_(\W|$)/) + || delimited(TokenType.Bold, '*', /\*(\W|$)/) + || delimited(TokenType.Code, '`', /`(\W|$)/) + || delimited(TokenType.Strike, '~', /~(\W|$)/) + || mention(TokenType.Mention, '@') + || link(TokenType.Link); + if (!found) { + let end = chunk.indexOf(' ', 1) + /* eat space */ 1; + if (end === 0) { // no match, = -1 + 1 + end = chunk.length; + } + // append to previous token if it was also text + if (tokens.length > 0 && tokens[tokens.length - 1].type === TokenType.Text) { + tokens[tokens.length - 1].text += chunk.slice(0, end); + } else { + tokens.push(createToken(TokenType.Text, chunk.slice(0, end))); + } + i += end; + } + space(); + chunk = text.slice(i); + } + return tokens; +} + +function httpify(text: string): string { + if (!/^[a-z]+:/.test(text)) { + return `http://${text}`; + } + return text; +} + +/** + * Parses a chat message into a tree-ish structure. + */ +export default function parse(message: string, options: MarkupOptions = {}): MarkupNode[] { + if (typeof message !== 'string') { + throw new TypeError('Expected a string'); + } + + return tokenize(message, options).map((token) => { + switch (token.type) { + case TokenType.Italic: + return { type: 'italic', content: parse(token.text, options) }; + case TokenType.Bold: + return { type: 'bold', content: parse(token.text, options) }; + case TokenType.Code: + return { type: 'code', content: [token.text] }; + case TokenType.Strike: + return { type: 'strike', content: parse(token.text, options) }; + case TokenType.Emoji: + return { type: 'emoji', name: token.text }; + case TokenType.Mention: + return { type: 'mention', mention: token.text.toLowerCase(), raw: token.text }; + case TokenType.Link: + return { type: 'link', text: token.text, href: httpify(token.text) }; + case TokenType.Text: + return token.text; + } + }); +} diff --git a/src/url-regex.js b/src/url-regex.ts similarity index 92% rename from src/url-regex.js rename to src/url-regex.ts index 56b0120..b45a821 100644 --- a/src/url-regex.js +++ b/src/url-regex.ts @@ -1,7 +1,7 @@ /** * Adapted from https://github.com/kevva/url-regex. */ -export default function urlRegex() { +export default function urlRegex(): RegExp { const protocol = '(?:[a-z]+://)'; const auth = '(?:\\S+(?::\\S*)?@)?'; const host = '(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)'; diff --git a/test/index.js b/test/index.js index 65e8447..90d182a 100644 --- a/test/index.js +++ b/test/index.js @@ -1,7 +1,7 @@ -import { expect } from 'chai'; -import parseChatMarkup from '../src/index'; +const { expect } = require('chai'); +const parseChatMarkup = require('..').default; -describe('utils/parseChatMarkup', () => { +describe('parseChatMarkup', () => { const bareOptions = {}; it('Only accepts string inputs', () => { diff --git a/test/mocha.opts b/test/mocha.opts deleted file mode 100644 index a36ebeb..0000000 --- a/test/mocha.opts +++ /dev/null @@ -1 +0,0 @@ --r @babel/register diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..12d0df7 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,13 @@ +{ + "compilerOptions": { + "module": "es2015", + "target": "es5", + "declaration": true, + "newLine": "lf", + "strict": true, + "allowSyntheticDefaultImports": true + }, + "include": [ + "src/*.ts" + ] +}