Skip to content

Commit

Permalink
feature: implement EngineData parsing
Browse files Browse the repository at this point in the history
fixes #6
  • Loading branch information
scoiatael authored and pastelmind committed Aug 18, 2022
1 parent 8885f16 commit 8c5f09c
Show file tree
Hide file tree
Showing 18 changed files with 1,193 additions and 1 deletion.
10 changes: 9 additions & 1 deletion packages/psd/src/classes/Layer.ts
Expand Up @@ -2,7 +2,7 @@
// Copyright 2021-present NAVER WEBTOON
// MIT License

import {ImageData} from "../interfaces";
import {EngineData, ImageData} from "../interfaces";
import {LayerFrame} from "../sections";
import {NodeParent} from "./Node";
import {NodeBase} from "./NodeBase";
Expand Down Expand Up @@ -65,6 +65,14 @@ export class Layer
return this.layerFrame.layerProperties.text;
}

/**
* If this layer is a text layer, this property retrieves its text properties.
* Otherwise, this property is `undefined`.
*/
get textProperties(): EngineData | undefined {
return this.layerFrame.layerProperties.textProperties;
}

protected get imageData(): ImageData {
const {red, green, blue, alpha} = this.layerFrame;

Expand Down
7 changes: 7 additions & 0 deletions packages/psd/src/engineData/index.ts
@@ -0,0 +1,7 @@
// @webtoon/psd
// Copyright 2021-present NAVER WEBTOON
// MIT License

export * from "./lexer";
export * from "./parser";
export * from "./validator";
217 changes: 217 additions & 0 deletions packages/psd/src/engineData/lexer.ts
@@ -0,0 +1,217 @@
// @webtoon/psd
// Copyright 2021-present NAVER WEBTOON
// MIT License

// Based on PDF grammar: https://web.archive.org/web/20220226063926/https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf
// Section 7.2 - Lexical Conventions

import {
Cursor,
InvalidEngineDataBoolean,
InvalidEngineDataNumber,
InvalidEngineDataTextBOM,
} from "../utils";

export enum TokenType {
String,
DictBeg,
DictEnd,
ArrBeg,
ArrEnd,
Name,
Number,
Boolean,
}

export type Token =
| {type: TokenType.String; value: string}
| {type: TokenType.DictBeg}
| {type: TokenType.DictEnd}
| {type: TokenType.ArrBeg}
| {type: TokenType.ArrEnd}
| {type: TokenType.Name; value: string}
| {type: TokenType.Number; value: number}
| {type: TokenType.Boolean; value: boolean};

const WhitespaceCharacters = new Set([
0,
9,
12,
32, // ' '
10, // \n
13, // \r
]);

const BooleanStartCharacters = new Set([
0x66, // f
0x74, // t
]);

const Delimiters = {
"(": 0x28,
")": 0x29,
"<": 0x3c,
">": 0x3e,
"[": 0x5b,
"]": 0x5d,
"/": 0x2f,
"\\": 0x5c,
// NOTE: These have meaning within PDF. Are they used here?
// "{": 123,
// "}": 125,
// "%": 37,
};

const DelimiterCharacters = new Set(Object.values(Delimiters));

export class Lexer {
constructor(private cursor: Cursor) {}

*tokens(): Generator<Token> {
while (!this.done()) {
const val = this.cursor.read("u8");

if (WhitespaceCharacters.has(val)) {
while (!this.done() && WhitespaceCharacters.has(this.cursor.peek()))
this.cursor.pass(1);
continue;
}
if (DelimiterCharacters.has(val)) {
if (val === Delimiters["("]) {
yield {type: TokenType.String, value: this.text()};
continue;
}
if (val === Delimiters["["]) {
yield {type: TokenType.ArrBeg};
continue;
}
if (val === Delimiters["]"]) {
yield {type: TokenType.ArrEnd};
continue;
}
if (val === Delimiters["<"]) {
// NOTE: assert that it is < indeed?
this.cursor.pass(1);
yield {type: TokenType.DictBeg};
continue;
}
if (val === Delimiters[">"]) {
// NOTE: assert that it is > indeed?
this.cursor.pass(1);
yield {type: TokenType.DictEnd};
continue;
}
if (val === Delimiters["/"]) {
yield {type: TokenType.Name, value: this.string()};
continue;
}
console.assert(
false,
"Unhandled delimiter: '%s'",
String.fromCharCode(val)
);
continue;
}
// only two types left: number or boolean
// we need to return val first since it starts value
this.cursor.unpass(1);
if (BooleanStartCharacters.has(val)) {
yield {type: TokenType.Boolean, value: this.boolean()};
} else {
yield {type: TokenType.Number, value: this.number()};
}
}
}

private done(): boolean {
return this.cursor.position >= this.cursor.length;
}

private text(): string {
const firstByte = this.cursor.peek();
if (firstByte === Delimiters[")"]) {
this.cursor.pass(1);
return "";
}
const hasBom = firstByte === 0xff || firstByte === 0xfe;
let decoder = new TextDecoder("utf-16be");
if (hasBom) {
decoder = this.textDecoderFromBOM();
}
const textParts = [] as string[];
const readAhead = this.cursor.clone();
while (readAhead.peek() !== Delimiters[")"]) {
readAhead.pass(1);
if (readAhead.peek() === Delimiters["\\"]) {
const length = readAhead.position - this.cursor.position;
let raw = this.cursor.take(length);
if (raw.at(-1) === 0x00) {
// Sometimes there's extra padding before - we need to remove it
raw = raw.subarray(0, -1);
}
textParts.push(decoder.decode(raw));
readAhead.pass(1); // skip over \\
textParts.push(String.fromCharCode(readAhead.take(1)[0])); // un-escape character
this.cursor.pass(2); // skip over escaped character to avoid decoding it in subsequent part
}
}
const length = readAhead.position - this.cursor.position;
const raw = this.cursor.take(length);
textParts.push(decoder.decode(raw));
this.cursor.pass(1); // final )
return textParts.join("");
}

private textDecoderFromBOM(): TextDecoder {
const firstBomPart = this.cursor.read("u8");
const sndBomPart = this.cursor.read("u8");
// https://en.wikipedia.org/wiki/Byte_order_mark#UTF-16
// LE is FF FE
if (firstBomPart === 0xff && sndBomPart === 0xfe)
return new TextDecoder("utf-16le");
// BE is FE FF
if (firstBomPart === 0xfe && sndBomPart === 0xff)
return new TextDecoder("utf-16be");
throw new InvalidEngineDataTextBOM(
`Unknown BOM value: [${firstBomPart}, ${sndBomPart}]`
);
}

private string(): string {
const decoder = new TextDecoder("ascii");
const readAhead = this.cursor.clone();
while (
!this.done() &&
!WhitespaceCharacters.has(this.cursor.peek()) &&
!DelimiterCharacters.has(this.cursor.peek())
) {
this.cursor.pass(1);
}
const text = decoder.decode(
readAhead.take(this.cursor.position - readAhead.position)
);
return text;
}

private number(): number {
const text = this.string();
const value = Number(text);
if (Number.isNaN(value)) {
throw new InvalidEngineDataNumber(`parsing '${text}' as Number failed`);
}
return value;
}

private boolean(): boolean {
const text = this.string();
if (text === "true") {
return true;
}
if (text === "false") {
return false;
}
throw new InvalidEngineDataBoolean(
`'${text}' is neither 'true' nor 'false'`
);
}
}
106 changes: 106 additions & 0 deletions packages/psd/src/engineData/parser.ts
@@ -0,0 +1,106 @@
// @webtoon/psd
// Copyright 2021-present NAVER WEBTOON
// MIT License

import {
InvalidEngineDataDictKey,
InvalidTopLevelEngineDataValue,
UnexpectedEndOfEngineData,
UnexpectedEngineDataToken,
} from "../utils";
import {Token, TokenType} from "./lexer";

export type RawEngineData = {
[key: string]: RawEngineValue;
};
export type RawEngineValue =
| string
| number
| boolean
| RawEngineValue[]
| RawEngineData;

export class Parser {
// private done: boolean = false
constructor(private tokens: Generator<Token>) {}

parse(): RawEngineData {
const value = this.value();
// TODO: for this to be true we'd need to force lexer somehow into reaching end-of-file
// console.assert(this.done, "not all tokens from engine data were consumed")
if (typeof value === "object" && !Array.isArray(value)) {
return value;
}
throw new InvalidTopLevelEngineDataValue(
`EngineData top-level value is not a dict; is ${typeof value}`
);
}

private value(it?: Token): RawEngineValue {
/**
* NOTE: this is recursive descent parser - simplest solution in terms of code complexity
* In case we ever start to run into stack-depth issues
* ("RangeError: Maximum call stack size exceeded" )
* due to parsing data that's too big, this can be re-written into stack-based one.
* That's because EngineData can be thought about as reverse-polish notation:
* ] - end of array requires popping values from stack until you hit [
* (and pushing new value - an array - onto stack)
* same for << and >>.
*/
if (!it) {
it = this.advance();
}
switch (it.type) {
case TokenType.Name:
case TokenType.Number:
case TokenType.Boolean:
case TokenType.String:
return it.value;
case TokenType.DictBeg:
return this.dict();
case TokenType.ArrBeg:
return this.arr();
}
throw new UnexpectedEngineDataToken(
`Unexpected token: ${TokenType[it.type]}`
);
}

private advance(): Token {
const it = this.tokens.next();
// this.done = Boolean(it.done);
if (!it.value) {
throw new UnexpectedEndOfEngineData("End of stream");
}
return it.value;
}

private dict(): RawEngineData {
const val = {} as RawEngineData;
for (;;) {
const it = this.advance();
if (it.type === TokenType.DictEnd) {
return val;
}
if (it.type !== TokenType.Name) {
throw new InvalidEngineDataDictKey(
`Dict key is not Name; is ${TokenType[it.type]}`
);
}
const value = this.value();
val[it.value] = value;
}
}

private arr(): RawEngineValue[] {
const val = [] as RawEngineValue[];
for (;;) {
const it = this.advance();
if (it.type === TokenType.ArrEnd) {
return val;
}
const value = this.value(it);
val.push(value);
}
}
}
40 changes: 40 additions & 0 deletions packages/psd/src/engineData/validator.ts
@@ -0,0 +1,40 @@
// @webtoon/psd
// Copyright 2021-present NAVER WEBTOON
// MIT License

import {EngineData} from "../interfaces";

const REQUIRED_KEYS = new Set([
"DocumentResources",
"EngineDict",
"ResourceDict",
]);

function hasOwnProperty<K extends string>(
obj: unknown,
prop: K
): obj is Record<K, unknown> {
return Object.prototype.hasOwnProperty.call(obj, prop);
}

export function validateEngineData(
engineData: unknown
): engineData is EngineData {
let ok = true;
if (typeof engineData !== "object") {
return false;
}
if (!engineData) {
return false;
}
for (const key of REQUIRED_KEYS) {
if (hasOwnProperty(engineData, key)) {
const value = engineData[key];
ok &&=
typeof value === "object" && !Array.isArray(value) && Boolean(value);
} else {
return false;
}
}
return ok;
}

0 comments on commit 8c5f09c

Please sign in to comment.