webtoon · pastelmind · Aug 18, 2022 · Jul 28, 2022 · Aug 3, 2022 · Aug 3, 2022
diff --git a/packages/psd/src/classes/Layer.ts b/packages/psd/src/classes/Layer.ts
@@ -2,7 +2,7 @@
 // Copyright 2021-present NAVER WEBTOON
 // MIT License
 
-import {ImageData} from "../interfaces";
+import {EngineData, ImageData} from "../interfaces";
 import {LayerFrame} from "../sections";
 import {NodeParent} from "./Node";
 import {NodeBase} from "./NodeBase";
@@ -65,6 +65,14 @@ export class Layer
     return this.layerFrame.layerProperties.text;
   }
 
+  /**
+   * If this layer is a text layer, this property retrieves its text properties.
+   * Otherwise, this property is `undefined`.
+   */
+  get textProperties(): EngineData | undefined {
+    return this.layerFrame.layerProperties.textProperties;
+  }
+
   protected get imageData(): ImageData {
     const {red, green, blue, alpha} = this.layerFrame;
 

diff --git a/packages/psd/src/engineData/index.ts b/packages/psd/src/engineData/index.ts
@@ -0,0 +1,7 @@
+// @webtoon/psd
+// Copyright 2021-present NAVER WEBTOON
+// MIT License
+
+export * from "./lexer";
+export * from "./parser";
+export * from "./validator";
diff --git a/packages/psd/src/engineData/lexer.ts b/packages/psd/src/engineData/lexer.ts
@@ -0,0 +1,228 @@
+// @webtoon/psd
+// Copyright 2021-present NAVER WEBTOON
+// MIT License
+
+// Based on PDF grammar: https://web.archive.org/web/20220226063926/https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf
+// Section 7.2 - Lexical Conventions
+
+import {
+  Cursor,
+  InvalidEngineDataBoolean,
+  InvalidEngineDataNumber,
+  InvalidEngineDataTextBOM,
+} from "../utils";
+
+export enum TokenType {
+  String,
+  DictBeg,
+  DictEnd,
+  ArrBeg,
+  ArrEnd,
+  Name,
+  Number,
+  Boolean,
+}
+
+export type Token =
+  | {type: TokenType.String; value: string}
+  | {type: TokenType.DictBeg}
+  | {type: TokenType.DictEnd}
+  | {type: TokenType.ArrBeg}
+  | {type: TokenType.ArrEnd}
+  | {type: TokenType.Name; value: string}
+  | {type: TokenType.Number; value: number}
+  | {type: TokenType.Boolean; value: boolean};
+
+const WhitespaceCharacters = new Set([
+  0,
+  9,
+  12,
+  32, // ' '
+  10, // \n
+  13, // \r
+]);
+
+const BooleanStartCharacters = new Set([
+  0x66, // f
+  0x74, // t
+]);
+
+const Delimiters = {
+  "(": 0x28,
+  ")": 0x29,
+  "<": 0x3c,
+  ">": 0x3e,
+  "[": 0x5b,
+  "]": 0x5d,
+  "/": 0x2f,
+  "\\": 0x5c,
+  // NOTE: These have meaning within PDF. Are they used here?
+  // "{": 123,
+  // "}": 125,
+  // "%": 37,
+};
+
+const DelimiterCharacters = new Set(Object.values(Delimiters));
+
+const STRING_TOKEN_JT = [] as boolean[];
+for (let i = 0; i < 256; i += 1) {
+  STRING_TOKEN_JT[i] =
+    WhitespaceCharacters.has(i) || DelimiterCharacters.has(i);
+}
+
+const STRING_DECODER = new TextDecoder("utf-8");
+function stringToken(cursor: Cursor): string {
+  const startsAt = cursor.position;
+  let endsAt = cursor.position;
+  for (const i of cursor.iter()) {
+    if (STRING_TOKEN_JT[i]) {
+      break;
+    }
+    endsAt += 1;
+  }
+  const text = STRING_DECODER.decode(cursor.take(endsAt - startsAt));
+  return text;
+}
+
+export class Lexer {
+  cursor: Cursor;
+
+  constructor(cursor: Uint8Array) {
+    this.cursor = Cursor.from(cursor);
+  }
+
+  tokens(): Token[] {
+    const value = [] as Token[];
+    while (!this.done()) {
+      const val = this.cursor.one();
+
+      if (WhitespaceCharacters.has(val)) {
+        while (!this.done() && WhitespaceCharacters.has(this.cursor.peek()))
+          this.cursor.pass(1);
+        continue;
+      }
+      if (DelimiterCharacters.has(val)) {
+        if (val === Delimiters["("]) {
+          value.push({type: TokenType.String, value: this.text()});
+          continue;
+        }
+        if (val === Delimiters["["]) {
+          value.push({type: TokenType.ArrBeg});
+          continue;
+        }
+        if (val === Delimiters["]"]) {
+          value.push({type: TokenType.ArrEnd});
+          continue;
+        }
+        if (val === Delimiters["<"]) {
+          // NOTE: assert that it is < indeed?
+          this.cursor.pass(1);
+          value.push({type: TokenType.DictBeg});
+          continue;
+        }
+        if (val === Delimiters[">"]) {
+          // NOTE: assert that it is > indeed?
+          this.cursor.pass(1);
+          value.push({type: TokenType.DictEnd});
+          continue;
+        }
+        if (val === Delimiters["/"]) {
+          value.push({type: TokenType.Name, value: this.string()});
+          continue;
+        }
+        console.assert(
+          false,
+          "Unhandled delimiter: '%s'",
+          String.fromCharCode(val)
+        );
+        continue;
+      }
+      // only two types left: number or boolean
+      // we need to return val first since it starts value
+      this.cursor.unpass(1);
+      if (BooleanStartCharacters.has(val)) {
+        value.push({type: TokenType.Boolean, value: this.boolean()});
+      } else {
+        value.push({type: TokenType.Number, value: this.number()});
+      }
+    }
+    return value;
+  }
+
+  private done(): boolean {
+    return this.cursor.position >= this.cursor.length;
+  }
+
+  private text(): string {
+    const firstByte = this.cursor.peek();
+    if (firstByte === Delimiters[")"]) {
+      this.cursor.pass(1);
+      return "";
+    }
+    const hasBom = firstByte === 0xff || firstByte === 0xfe;
+    let decoder = new TextDecoder("utf-16be");
+    if (hasBom) {
+      decoder = this.textDecoderFromBOM();
+    }
+    const textParts = [] as string[];
+    const readAhead = this.cursor.clone();
+    while (readAhead.peek() !== Delimiters[")"]) {
+      readAhead.pass(1);
+      if (readAhead.peek() === Delimiters["\\"]) {
+        const length = readAhead.position - this.cursor.position;
+        textParts.push(
+          decoder.decode(this.cursor.take(length), {stream: true})
+        );
+        readAhead.pass(2); // skip over \\
+        this.cursor.pass(1); // skip over escaped character to avoid decoding it in subsequent part
+        textParts.push(decoder.decode(this.cursor.take(1), {stream: true})); // push un-escaped character
+      }
+    }
+    const length = readAhead.position - this.cursor.position;
+    const raw = this.cursor.take(length);
+    textParts.push(decoder.decode(raw));
+    this.cursor.pass(1); // final )
+    return textParts.join("");
+  }
+
+  private textDecoderFromBOM(): TextDecoder {
+    const firstBomPart = this.cursor.one();
+    const sndBomPart = this.cursor.one();
+    // https://en.wikipedia.org/wiki/Byte_order_mark#UTF-16
+    // LE is FF FE
+    if (firstBomPart === 0xff && sndBomPart === 0xfe)
+      return new TextDecoder("utf-16le");
+    // BE is FE FF
+    if (firstBomPart === 0xfe && sndBomPart === 0xff)
+      return new TextDecoder("utf-16be");
+    throw new InvalidEngineDataTextBOM(
+      `Unknown BOM value: [${firstBomPart}, ${sndBomPart}]`
+    );
+  }
+
+  private string(): string {
+    return stringToken(this.cursor);
+  }
+
+  private number(): number {
+    const text = this.string();
+    const value = Number(text);
+    if (Number.isNaN(value)) {
+      throw new InvalidEngineDataNumber(`parsing '${text}' as Number failed`);
+    }
+    return value;
+  }
+
+  private boolean(): boolean {
+    const text = this.string();
+    if (text === "true") {
+      return true;
+    }
+    if (text === "false") {
+      return false;
+    }
+    throw new InvalidEngineDataBoolean(
+      `'${text}' is neither 'true' nor 'false'`
+    );
+  }
+}
diff --git a/packages/psd/src/engineData/parser.ts b/packages/psd/src/engineData/parser.ts
@@ -0,0 +1,110 @@
+// @webtoon/psd
+// Copyright 2021-present NAVER WEBTOON
+// MIT License
+
+import {
+  InvalidEngineDataDictKey,
+  InvalidTopLevelEngineDataValue,
+  UnexpectedEndOfEngineData,
+} from "../utils";
+import {Token, TokenType} from "./lexer";
+
+export type RawEngineData = {
+  [key: string]: RawEngineValue;
+};
+export type RawEngineValue =
+  | string
+  | number
+  | boolean
+  | RawEngineValue[]
+  | RawEngineData;
+
+const ARR_BOUNDARY = Symbol(TokenType[TokenType.ArrBeg]);
+const DICT_BOUNDARY = Symbol(TokenType[TokenType.DictBeg]);
+
+export class Parser {
+  private stack: (
+    | RawEngineValue
+    | typeof ARR_BOUNDARY
+    | typeof DICT_BOUNDARY
+  )[] = [];
+  constructor(private tokens: Iterable<Token>) {}
+
+  parse(): RawEngineData {
+    this.runParser();
+    const [value] = this.stack;
+    if (typeof value === "object" && !Array.isArray(value)) {
+      return value;
+    }
+    throw new InvalidTopLevelEngineDataValue(
+      `EngineData top-level value is not a dict; is ${typeof value}`
+    );
+  }
+
+  private runParser() {
+    for (const it of this.tokens) {
+      switch (it.type) {
+        case TokenType.Name:
+        case TokenType.Number:
+        case TokenType.Boolean:
+        case TokenType.String:
+          this.stack.push(it.value);
+          continue;
+        case TokenType.DictBeg:
+          this.stack.push(DICT_BOUNDARY);
+          continue;
+        case TokenType.ArrBeg:
+          this.stack.push(ARR_BOUNDARY);
+          continue;
+        case TokenType.DictEnd:
+          this.stack.push(this.dict());
+          continue;
+        case TokenType.ArrEnd:
+          this.stack.push(this.array().reverse());
+          continue;
+      }
+    }
+  }
+
+  private dict(): RawEngineData {
+    const val = {} as RawEngineData;
+    for (;;) {
+      const value = this.stack.pop();
+      // TODO: new error types?
+      if (value === undefined) {
+        throw new UnexpectedEndOfEngineData("Stack empty when parsing dict");
+      }
+      if (value === DICT_BOUNDARY) {
+        return val;
+      }
+      if (value === ARR_BOUNDARY) {
+        throw new InvalidEngineDataDictKey("Got ArrBeg while parsing a dict");
+      }
+      const it = this.stack.pop();
+      if (typeof it !== "string") {
+        throw new InvalidEngineDataDictKey(
+          `Dict key is not Name; is ${typeof it}`
+        );
+      }
+      val[it] = value;
+    }
+  }
+
+  private array(): RawEngineValue[] {
+    const val = [] as RawEngineValue[];
+    for (;;) {
+      const it = this.stack.pop();
+      // TODO: new error types?
+      if (it === undefined) {
+        throw new UnexpectedEndOfEngineData("Stack empty when parsing array");
+      }
+      if (it === DICT_BOUNDARY) {
+        throw new InvalidEngineDataDictKey("Got DictBeg while parsing array");
+      }
+      if (it === ARR_BOUNDARY) {
+        return val;
+      }
+      val.push(it);
+    }
+  }
+}