Skip to content

Commit

Permalink
Merge pull request #2 from diasbruno/feature/setup-unit-testing
Browse files Browse the repository at this point in the history
Setup unit testing
  • Loading branch information
veer66 committed Nov 10, 2023
2 parents 7f91ab2 + 3db84ae commit ac0afef
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 11 deletions.
70 changes: 70 additions & 0 deletions tests.lisp
@@ -0,0 +1,70 @@
(defpackage #:utf8-input-stream.tests
(:use :cl))

(in-package #:utf8-input-stream.tests)

(5am:def-suite utf8-test-suite)

(5am:in-suite utf8-test-suite)

(defmacro with-utf8-input-stream ((var source) &body body)
`(let ((,var (utf8-input-stream:make-utf8-input-stream
(flex:make-in-memory-input-stream
(flex:string-to-octets ,source :external-format
(flex:make-external-format :utf-8))))))
,@body))

(5am:def-test read-a-single-ascii-character-of-a-string ()
(with-utf8-input-stream (s "a simple string")
(5am:is (char-equal (read-char s) #\a))))

(5am:def-test peek-a-single-ascii-character-of-a-string ()
(with-utf8-input-stream (s "a simple string")
(5am:is (char-equal (peek-char t s) (read-char s)))))

(5am:def-test unread-a-single-ascii-character-of-a-string ()
(with-utf8-input-stream (s "a simple string")
(unread-char (read-char s) s)
(5am:is (char-equal (peek-char t s) #\a))))

(5am:def-test read-a-single-utf8-character-of-a-string ()
(with-utf8-input-stream (s "été")
(5am:is (string-equal (string (read-char s)) "é"))))

(5am:def-test peek-a-single-utf8-character-of-a-string ()
(with-utf8-input-stream (s "été")
(5am:is (string-equal (string (peek-char t s))
(string (read-char s))))))

(5am:def-test unread-a-single-utf8-character-of-a-string ()
(with-utf8-input-stream (s "été")
(unread-char (read-char s) s)
(5am:is (string-equal (string (peek-char t s)) "é"))))

(5am:def-test read-a-single-utf8-wide-character-of-a-string ()
(with-utf8-input-stream (s "ก and 𨭎")
(5am:is (string-equal (string (read-char s)) ""))))

(5am:def-test peek-a-single-utf8-wide-character-of-a-string ()
(with-utf8-input-stream (s "ก and 𨭎")
(5am:is (string-equal (string (peek-char t s))
(string (read-char s))))))

(5am:def-test unread-a-single-utf8-wide-character-of-a-string ()
(with-utf8-input-stream (s "ก and 𨭎")
(unread-char (read-char s) s)
(5am:is (string-equal (string (peek-char t s)) ""))))

(5am:def-test read-a-single-utf8-wide-character-of-a-string2 ()
(with-utf8-input-stream (s "𨭎 and ก")
(5am:is (string-equal (string (read-char s)) "𨭎"))))

(5am:def-test peek-a-single-utf8-wide-character-of-a-string2 ()
(with-utf8-input-stream (s "𨭎 and ก")
(5am:is (string-equal (string (peek-char t s))
(string (read-char s))))))

(5am:def-test unread-a-single-utf8-wide-character-of-a-string2 ()
(with-utf8-input-stream (s "𨭎 and ก")
(unread-char (read-char s) s)
(5am:is (string-equal (string (peek-char t s)) "𨭎"))))
25 changes: 14 additions & 11 deletions utf8-input-stream.lisp
Expand Up @@ -123,6 +123,15 @@
(ash (mask-field (byte 6 0) b1) 12)
(ash (mask-field (byte 4 0) b0) 18)))))

(declaim (inline utf8-char-code-size))
(defun utf8-char-code-size (code)
(cond
((<= code #x7F) 1)
((<= code #x7FF) 2)
((<= code #xFFFF) 3)
((<= code #x10FFFF) 4)
(t (error "Invalid UTF-8 character"))))

(defun fetch-ch (ctx)
(declare (optimize (speed 3) (debug 0) (safety 0)))
(let ((b0 (read-byte-from-buf ctx)))
Expand All @@ -133,7 +142,7 @@
((four-bytes-ch? b0) (fetch-4-bytes-ch ctx b0))
(t (error 'character-encoding-error :stream-context-pos (pos ctx))))))

(defun read-char-from-buf (ctx)
(defun read-char-from-buf (ctx)
(declare (optimize (speed 3) (debug 0) (safety 0)))
(loop do
(refill-buffer ctx)
Expand All @@ -145,16 +154,10 @@
(read-char-from-buf (ctx s)))

(defmethod stream-unread-char ((s character-input-stream) ch)
(let* ((b0 (char-code ch))
(give-back (cond
((one-byte-ch? b0) 1)
((two-bytes-ch? b0) 2)
((three-bytes-ch? b0) 3)
((four-bytes-ch? b0) 4))))
(setf (stream-context-pos (ctx s))
(- (stream-context-pos (ctx s)) give-back))
(setf (stream-context-buf-pos (ctx s))
(- (stream-context-buf-pos (ctx s)) give-back))))
(decf (stream-context-pos (ctx s)))
(setf (stream-context-buf-pos (ctx s))
(- (stream-context-buf-pos (ctx s))
(utf8-char-code-size (char-code ch)))))

(defun vector-to-string (v)
(let* ((len (length v))
Expand Down
12 changes: 12 additions & 0 deletions utf8-input-stream.tests.asd
@@ -0,0 +1,12 @@
;;;; rulegen.asd

(asdf:defsystem #:utf8-input-stream.tests
:description "A UTF-8 string input stream over a binary stream for Common Lisp"
:author "Vee Satayamas <vsatayamas@gmail.com>"
:license "MIT"
:version "0.0.1"
:serial t
:depends-on (#:fiveam
#:flexi-streams
#:utf8-input-stream)
:components ((:file "tests")))

0 comments on commit ac0afef

Please sign in to comment.