Skip to content

Commit

Permalink
CCL based lexical analyzers are introduced.
Browse files Browse the repository at this point in the history
  • Loading branch information
ikazuhiro committed Dec 16, 2014
1 parent 1cd3c29 commit 488a4d7
Show file tree
Hide file tree
Showing 3 changed files with 349 additions and 38 deletions.
17 changes: 17 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
2014-12-16 Kazuhiro Ito <kzhr@d1.dion.ne.jp>

CCL base lexical analyzers are available. They are much faster.
If you modify `std11-lexical-analyzer' or `mime-lexical-analyzer',
you also need to modify `std11-ccl-lexical-analyzer' or
`mime-ccl-lexical-analyzer', respectively.

* mime-parse.el (mime-default-ccl-lexical-analyzer): New CCL program.
(mime-ccl-lexical-analyzer): New customizable variable.
(mime-lexical-analyze): Use CCL based lexical analyzer if
mime-ccl-lexical-analyzer is non-nil.

* std11.el (std11-default-ccl-lexical-analyzer): New CCL program.
(std11-ccl-lexical-analyzer): New customizable variable.
(std11-lexical-analyze): Use CCL based lexical analyzer if
std11-ccl-lexical-analyzer is non-nil.

2014-11-09 Kazuhiro Ito <kzhr@d1.dion.ne.jp>

* mime-en.texi (7bit): Fix typo.
Expand Down
179 changes: 163 additions & 16 deletions mime-parse.el
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@

;;; Code:

(require 'custom)
(require 'pccl)
(require 'broken)

(require 'mime-def)
(require 'luna)
(require 'std11)
Expand All @@ -37,6 +41,144 @@
;;; @ lexical analyzer
;;;

(define-ccl-program mime-default-ccl-lexical-analyzer
;; r0 input
;; r1 flag means any character exists.
;; r2 in parse flag
;; 1 atom, 2 spaces 3 comment (no output) 4 encloser 5 error
;; r3 comment depth
(eval-when-compile
(let* ((wrt `(if (r0 == ?\") (write "\\\"")
(if (r0 == ?\\) (write "\\\\")
(write r0))))
(atm `((branch r2
((r2 = 1)
(write "(mime-token . \"")
(write-read-repeat r0))
(write-read-repeat r0)
((r2 = 1)
(write "(mime-token . \"")
(write-read-repeat r0)))
))
(ts `((if (r2 == 1) ((write "\")") (r2 = 0)))
(write "(tspecials . \"")
,wrt
(write "\")")
(read r0)
(repeat)))
(sp `((branch r2
((r2 = 2)
(read r0)
(repeat))
((write "\")")
(r2 = 2)
(read r0)
(repeat))
((read r0) (repeat)))
))
(err `((branch r2
((write "(error . \""))
((write "\")")
(write "(error . \""))
((write "(error . \"")))
(r2 = 5)
(loop
(write-read-repeat r0))))
(enc (lambda (name tag)
`((if (r2 == 1) ((write "\")")))
(write ,(concat "(" name " . \""))
(r2 = 4)
(loop
(read-branch
r0
,@(let* ((count (1+ (max tag ?\\)))
(result (make-vector count '(write-repeat r0))))
(dotimes (i count)
(aset result
i (cond ((eq i tag)
'(break))
((eq i ?\\)
`((write "\\\\")
(read r0)
,wrt
(repeat)))
((eq i ?\")
'((write "\\\"") (repeat))))))
(mapcar 'identity result)))
(write-repeat r0))
(write "\")")
(r2 = 0)
(read r0)
(repeat))))
(qs (funcall enc "quoted-string" ?\"))
(dl (funcall enc "domain-literal" ?\]))
(cm `((if (r2 == 1) ((write "\")")))
(r2 = 3)
(r3 = 1)
(loop
(read-branch
r0
,@(let* ((count (1+ (max ?\( ?\) ?\\)))
(result (make-vector count '(repeat))))
(dotimes (i count)
(aset result i (cond ((eq i ?\()
'((r3 += 1) (repeat)))
((eq i ?\))
'((r3 -= 1)
(if (r3 < 1) (break)
(repeat))))
((eq i ?\\)
`((read r0)
(repeat)))
)))
(mapcar 'identity result)))
(repeat))
(r2 = 0)
(read r0)
(repeat))))
`(8
((r2 = 0)
(read r0)
(r1 = 1)
(write "((")
(loop
(branch r0
,@(mapcar (lambda (elt) (eval elt))
'(err err err err err err err err
err sp sp err err err err err
err err err err err err err err
err err err err err err err err
sp atm qs atm atm atm atm atm
cm ts atm atm ts atm atm ts
atm atm atm atm atm atm atm atm
atm atm ts ts ts ts ts ts
ts atm atm atm atm atm atm atm
atm atm atm atm atm atm atm atm
atm atm atm atm atm atm atm atm
atm atm atm dl ts ts)))
,@atm))
((branch r1
(write "(nil . t)")
(branch r2
(write ") . t)")
(write "\")) . t)")
(write "\")) . t)")
(write "\")))")
(write "\")))")
(write "\")) . t)"))
))
))))

(defcustom mime-ccl-lexical-analyzer
(when (null (broken-p 'ccl-execute-eof-block))
'mime-default-ccl-lexical-analyzer)
"Specify CCL-program symbol for `mime-lexical-analyze'.
When nil, do not use CCL.
See docstring of `std11-ccl-lexical-analyzer' for details of CCL-program.
If you modify `mime-lexical-analyzer', set this variable to nil or prepare corresponding CCL-program."
:group 'mime
:type '(choice symbol (const :tag "Do not use CCL." nil)))

(defcustom mime-lexical-analyzer
'(std11-analyze-quoted-string
std11-analyze-domain-literal
Expand Down Expand Up @@ -70,22 +212,27 @@ be the result."

(defun mime-lexical-analyze (string)
"Analyze STRING as lexical tokens of MIME."
(let ((ret (std11-lexical-analyze string mime-lexical-analyzer))
prev tail)
;; skip leading linear-white-space.
(while (memq (car (car ret)) '(spaces comment))
(setq ret (cdr ret)))
(setq prev ret
tail (cdr ret))
;; remove linear-white-space.
(while tail
(if (memq (car (car tail)) '(spaces comment))
(progn
(setcdr prev (cdr tail))
(setq tail (cdr tail)))
(setq prev (cdr prev)
tail (cdr tail))))
ret))
(let (ret prev tail)
(if (and mime-ccl-lexical-analyzer
(cdr (setq ret (read (ccl-execute-on-string
mime-ccl-lexical-analyzer
(make-vector 9 0) (or string ""))))))
(car ret)
(setq ret (std11-lexical-analyze string mime-lexical-analyzer))
;; skip leading linear-white-space.
(while (memq (car (car ret)) '(spaces comment))
(setq ret (cdr ret)))
(setq prev ret
tail (cdr ret))
;; remove linear-white-space.
(while tail
(if (memq (car (car tail)) '(spaces comment))
(progn
(setcdr prev (cdr tail))
(setq tail (cdr tail)))
(setq prev (cdr prev)
tail (cdr tail))))
ret)))


;;; @ field parser
Expand Down

10 comments on commit 488a4d7

@tats
Copy link

@tats tats commented on 488a4d7 Apr 26, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, this commit causes an error in SEMI with XEmacs (Debian's xemacs21 21.4.22-11).
Comments?

Compiling /usr/share/xemacs21/site-lisp/semi/mime-signature.el...
While compiling toplevel forms in file /usr/share/xemacs21/site-lisp/semi/mime-signature.el:
  !! Invalid read syntax ((". in wrong context"))

@ikazuhiro
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the report.
The error occurs in defining default value of mime-edit-mime-version-field-for-message/partial in mime-edit.el.
Should be fixed at db0f915. Please test.

@tats
Copy link

@tats tats commented on 488a4d7 Apr 28, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed at db0f915.

It works. Thank you.

@tats
Copy link

@tats tats commented on 488a4d7 Apr 29, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, XEmacs MULE works, but XEmacs non-MULE fails. To use CCL,
it should be checked with (unless-broken ccl-usable ...) as FLIM-ELS.
cf. https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=783713

@ikazuhiro
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even officially released versions of FLIM and SEMI has problems on non-mule environments. For example,

  1. eword-encode-split-string (one of FLIM's basic function in eword-encode.el) may error.
  2. mime-edit.el is installable, but not loadable.
    If you really need the fix for this commit, I'll try it later. But I doubt FLIM is used by non-mule users and want to drop a support for non-mule environment rather than to fix them.

@tats
Copy link

@tats tats commented on 488a4d7 Apr 30, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mime-edit.el is installable, but not loadable.

Hmm, on my XEmacs non-MULE environment, without CCL programs,
(flim-1_14-wl 2014-09-15 and semi-epg 2014-02-13), mime-edit is
loadable and can be used from Wanderlust.

Note that the *.elc files are incompatible between MULE and
non-MULE, so you should byte-compile APEL, FLIM, SEMI and
Wanderlust with non-MULE.

want to drop a support for non-mule

Anyway, I agree it's hard to support non-MULE, so I'll follow
your decision, whether you drop non-MULE or not.

@ikazuhiro
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I confirmed with xemacs21-nomule (sid), APEL (https://github.com/wanderlust/apel), FLIM-1.14.9 and SEMI-1.14.6. (require 'mime-edit) raises error because mime-edit.el contains below code

(mime-encode-field-body
 (concat " 1.0 (split by " mime-edit-version ")\n")
 "MIME-Version")

Above code raises error Symbol's function definition is void: char-charset on non-mule environment.

@tats
Copy link

@tats tats commented on 488a4d7 May 1, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(require 'mime-edit) raises error
Symbol's function definition is void: char-charset on non-mule environment.

Ah, I see. (require 'poem) may prevent this error.

I didn't meet this error because I used M-x wl RET. And then I can create
a message and save it to +draft without errors on xemacs21-nomule.

@ikazuhiro
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eventually I've committed the change to support on non-mule environment because only trivial change is required. But support for non-mule would be dropped in the not so far future.

@tats
Copy link

@tats tats commented on 488a4d7 May 3, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you. I've updated Debian packages flim and semi with your changes.

Please sign in to comment.