Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: implement EngineData parsing #43

Merged
merged 7 commits into from
Aug 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 9 additions & 1 deletion packages/psd/src/classes/Layer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Copyright 2021-present NAVER WEBTOON
// MIT License

import {ImageData} from "../interfaces";
import {EngineData, ImageData} from "../interfaces";
import {LayerFrame} from "../sections";
import {NodeParent} from "./Node";
import {NodeBase} from "./NodeBase";
Expand Down Expand Up @@ -65,6 +65,14 @@ export class Layer
return this.layerFrame.layerProperties.text;
}

/**
* If this layer is a text layer, this property retrieves its text properties.
* Otherwise, this property is `undefined`.
*/
get textProperties(): EngineData | undefined {
return this.layerFrame.layerProperties.textProperties;
}

protected get imageData(): ImageData {
const {red, green, blue, alpha} = this.layerFrame;

Expand Down
7 changes: 7 additions & 0 deletions packages/psd/src/engineData/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// @webtoon/psd
// Copyright 2021-present NAVER WEBTOON
// MIT License

export * from "./lexer";
export * from "./parser";
export * from "./validator";
228 changes: 228 additions & 0 deletions packages/psd/src/engineData/lexer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
// @webtoon/psd
// Copyright 2021-present NAVER WEBTOON
// MIT License

// Based on PDF grammar: https://web.archive.org/web/20220226063926/https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf
// Section 7.2 - Lexical Conventions

import {
Cursor,
InvalidEngineDataBoolean,
InvalidEngineDataNumber,
InvalidEngineDataTextBOM,
} from "../utils";

export enum TokenType {
String,
DictBeg,
DictEnd,
ArrBeg,
ArrEnd,
Name,
Number,
Boolean,
}

export type Token =
| {type: TokenType.String; value: string}
| {type: TokenType.DictBeg}
| {type: TokenType.DictEnd}
| {type: TokenType.ArrBeg}
| {type: TokenType.ArrEnd}
| {type: TokenType.Name; value: string}
| {type: TokenType.Number; value: number}
| {type: TokenType.Boolean; value: boolean};

const WhitespaceCharacters = new Set([
0,
9,
12,
32, // ' '
10, // \n
13, // \r
]);

const BooleanStartCharacters = new Set([
0x66, // f
0x74, // t
]);

const Delimiters = {
"(": 0x28,
")": 0x29,
"<": 0x3c,
">": 0x3e,
"[": 0x5b,
"]": 0x5d,
"/": 0x2f,
"\\": 0x5c,
// NOTE: These have meaning within PDF. Are they used here?
// "{": 123,
// "}": 125,
// "%": 37,
};

const DelimiterCharacters = new Set(Object.values(Delimiters));

const STRING_TOKEN_JT = [] as boolean[];
for (let i = 0; i < 256; i += 1) {
STRING_TOKEN_JT[i] =
WhitespaceCharacters.has(i) || DelimiterCharacters.has(i);
}

const STRING_DECODER = new TextDecoder("utf-8");
function stringToken(cursor: Cursor): string {
const startsAt = cursor.position;
let endsAt = cursor.position;
for (const i of cursor.iter()) {
if (STRING_TOKEN_JT[i]) {
break;
}
endsAt += 1;
}
const text = STRING_DECODER.decode(cursor.take(endsAt - startsAt));
return text;
}

export class Lexer {
cursor: Cursor;

constructor(cursor: Uint8Array) {
this.cursor = Cursor.from(cursor);
}

tokens(): Token[] {
const value = [] as Token[];
while (!this.done()) {
const val = this.cursor.one();

if (WhitespaceCharacters.has(val)) {
while (!this.done() && WhitespaceCharacters.has(this.cursor.peek()))
this.cursor.pass(1);
continue;
}
if (DelimiterCharacters.has(val)) {
if (val === Delimiters["("]) {
value.push({type: TokenType.String, value: this.text()});
continue;
}
if (val === Delimiters["["]) {
value.push({type: TokenType.ArrBeg});
continue;
}
if (val === Delimiters["]"]) {
value.push({type: TokenType.ArrEnd});
continue;
}
if (val === Delimiters["<"]) {
// NOTE: assert that it is < indeed?
this.cursor.pass(1);
value.push({type: TokenType.DictBeg});
continue;
}
if (val === Delimiters[">"]) {
// NOTE: assert that it is > indeed?
this.cursor.pass(1);
value.push({type: TokenType.DictEnd});
continue;
}
if (val === Delimiters["/"]) {
value.push({type: TokenType.Name, value: this.string()});
continue;
}
console.assert(
false,
"Unhandled delimiter: '%s'",
String.fromCharCode(val)
);
continue;
}
// only two types left: number or boolean
// we need to return val first since it starts value
this.cursor.unpass(1);
if (BooleanStartCharacters.has(val)) {
value.push({type: TokenType.Boolean, value: this.boolean()});
} else {
value.push({type: TokenType.Number, value: this.number()});
}
}
return value;
}

private done(): boolean {
return this.cursor.position >= this.cursor.length;
}

private text(): string {
const firstByte = this.cursor.peek();
if (firstByte === Delimiters[")"]) {
this.cursor.pass(1);
return "";
}
const hasBom = firstByte === 0xff || firstByte === 0xfe;
let decoder = new TextDecoder("utf-16be");
if (hasBom) {
decoder = this.textDecoderFromBOM();
}
const textParts = [] as string[];
const readAhead = this.cursor.clone();
while (readAhead.peek() !== Delimiters[")"]) {
readAhead.pass(1);
if (readAhead.peek() === Delimiters["\\"]) {
const length = readAhead.position - this.cursor.position;
textParts.push(
decoder.decode(this.cursor.take(length), {stream: true})
);
readAhead.pass(2); // skip over \\
this.cursor.pass(1); // skip over escaped character to avoid decoding it in subsequent part
textParts.push(decoder.decode(this.cursor.take(1), {stream: true})); // push un-escaped character
}
}
const length = readAhead.position - this.cursor.position;
const raw = this.cursor.take(length);
textParts.push(decoder.decode(raw));
this.cursor.pass(1); // final )
return textParts.join("");
}

private textDecoderFromBOM(): TextDecoder {
const firstBomPart = this.cursor.one();
const sndBomPart = this.cursor.one();
// https://en.wikipedia.org/wiki/Byte_order_mark#UTF-16
// LE is FF FE
if (firstBomPart === 0xff && sndBomPart === 0xfe)
return new TextDecoder("utf-16le");
// BE is FE FF
if (firstBomPart === 0xfe && sndBomPart === 0xff)
return new TextDecoder("utf-16be");
throw new InvalidEngineDataTextBOM(
`Unknown BOM value: [${firstBomPart}, ${sndBomPart}]`
);
}

private string(): string {
return stringToken(this.cursor);
}

private number(): number {
const text = this.string();
const value = Number(text);
if (Number.isNaN(value)) {
throw new InvalidEngineDataNumber(`parsing '${text}' as Number failed`);
}
return value;
}

private boolean(): boolean {
const text = this.string();
if (text === "true") {
return true;
}
if (text === "false") {
return false;
}
throw new InvalidEngineDataBoolean(
`'${text}' is neither 'true' nor 'false'`
);
}
}
110 changes: 110 additions & 0 deletions packages/psd/src/engineData/parser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// @webtoon/psd
// Copyright 2021-present NAVER WEBTOON
// MIT License

import {
InvalidEngineDataDictKey,
InvalidTopLevelEngineDataValue,
UnexpectedEndOfEngineData,
} from "../utils";
import {Token, TokenType} from "./lexer";

export type RawEngineData = {
[key: string]: RawEngineValue;
};
export type RawEngineValue =
| string
| number
| boolean
| RawEngineValue[]
| RawEngineData;

const ARR_BOUNDARY = Symbol(TokenType[TokenType.ArrBeg]);
const DICT_BOUNDARY = Symbol(TokenType[TokenType.DictBeg]);

export class Parser {
private stack: (
| RawEngineValue
| typeof ARR_BOUNDARY
| typeof DICT_BOUNDARY
)[] = [];
constructor(private tokens: Iterable<Token>) {}

parse(): RawEngineData {
this.runParser();
const [value] = this.stack;
if (typeof value === "object" && !Array.isArray(value)) {
return value;
}
throw new InvalidTopLevelEngineDataValue(
`EngineData top-level value is not a dict; is ${typeof value}`
);
}

private runParser() {
for (const it of this.tokens) {
switch (it.type) {
case TokenType.Name:
case TokenType.Number:
case TokenType.Boolean:
case TokenType.String:
this.stack.push(it.value);
continue;
case TokenType.DictBeg:
this.stack.push(DICT_BOUNDARY);
continue;
case TokenType.ArrBeg:
this.stack.push(ARR_BOUNDARY);
continue;
case TokenType.DictEnd:
this.stack.push(this.dict());
continue;
case TokenType.ArrEnd:
this.stack.push(this.array().reverse());
continue;
}
}
}

private dict(): RawEngineData {
const val = {} as RawEngineData;
for (;;) {
const value = this.stack.pop();
// TODO: new error types?
if (value === undefined) {
throw new UnexpectedEndOfEngineData("Stack empty when parsing dict");
}
if (value === DICT_BOUNDARY) {
return val;
}
if (value === ARR_BOUNDARY) {
throw new InvalidEngineDataDictKey("Got ArrBeg while parsing a dict");
}
const it = this.stack.pop();
if (typeof it !== "string") {
throw new InvalidEngineDataDictKey(
`Dict key is not Name; is ${typeof it}`
);
}
val[it] = value;
}
}

private array(): RawEngineValue[] {
const val = [] as RawEngineValue[];
for (;;) {
const it = this.stack.pop();
// TODO: new error types?
if (it === undefined) {
throw new UnexpectedEndOfEngineData("Stack empty when parsing array");
}
if (it === DICT_BOUNDARY) {
throw new InvalidEngineDataDictKey("Got DictBeg while parsing array");
}
if (it === ARR_BOUNDARY) {
return val;
}
val.push(it);
}
}
}