Skip to content

Commit

Permalink
Merge: core: intro BytesWriter and BytesReader
Browse files Browse the repository at this point in the history
Intro the classes `BytesReader` and `BytesWriter` to support byte read/writer operations directly on `Bytes` in memory.

The new classes are now super classes to `StringReader` (converting the `String` source to `Bytes` in the constructor) and `StringWriter`(cleaning the UTF-8 string in `to_s` only). The only differences between the string and bytes streams are the behavior of `StringWriter::to_s` and the `String` parameter expected by the constructor of `StringReader`.

`BytesWriter` supports writing any bytes, including composing UTF-8 characters byte by byte, a behavior also useful for `StringWriter`:
~~~
var writer = new BytesWriter

# Write just the character first half
writer.write_byte 0xC2u8
assert writer.to_s == "\\xC2"
assert writer.bytes.to_s == "�"

# Complete the character
writer.write_byte 0xA2u8
assert writer.to_s == "\\xC2\\xA2"
assert writer.bytes.to_s == "¢"
~~~

`BytesReader` also supports reading any bytes, including the bytes composing an UTF-8 character for `StringReader`:

~~~
var reader = new BytesReader(b"a…b")
assert reader.read_char == 'a'
assert reader.read_byte == 0xE2u8 # 1st byte of '…'
assert reader.read_byte == 0x80u8 # 2nd byte of '…'
assert reader.read_char == '�' # Reads the last byte as an invalid char
assert reader.read_all_bytes == b"b"
~~~

Pull-Request: nitlang#2539
Reviewed-by: Jean Privat <jean@pryen.org>
Reviewed-by: Lucas Bajolet <r4pass@hotmail.com>
  • Loading branch information
privat committed Sep 18, 2017
2 parents 932d148 + 697a18b commit 1f1aa04
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 56 deletions.
15 changes: 6 additions & 9 deletions lib/core/bytes.nit
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ class Bytes
return slice(st, ed - st + 1)
end

# Returns a subset of the content of `self` starting at `from` and of length `count`
# Copy a subset of `self` starting at `from` and of `count` bytes
#
# var b = "abcd".to_bytes
# assert b.slice(1, 2).hexdigest == "6263"
Expand All @@ -239,7 +239,7 @@ class Bytes
return ret
end

# Returns a copy of `self` starting at `from`
# Copy of `self` starting at `from`
#
# var b = "abcd".to_bytes
# assert b.slice_from(1).hexdigest == "626364"
Expand Down Expand Up @@ -453,18 +453,15 @@ class Bytes
length += ln
end

# Appends the bytes of `s` to `selftextextt`
fun append_text(s: Text) do
for i in s.substrings do
append_ns(i.fast_cstring, i.byte_length)
end
end
# Appends the bytes of `str` to `self`
fun append_text(str: Text) do str.append_to_bytes self

redef fun append_to(b) do b.append self

redef fun enlarge(sz) do
if capacity >= sz then return
persisted = false
if capacity < 16 then capacity = 16
while capacity < sz do capacity = capacity * 2 + 2
var ns = new CString(capacity)
items.copy_to(ns, length, 0, 0)
Expand Down Expand Up @@ -931,7 +928,7 @@ end
redef class FlatText
redef fun append_to_bytes(b) do
var from = if self isa FlatString then first_byte else 0
b.append_ns_from(items, byte_length, from)
if isset _items then b.append_ns_from(items, byte_length, from)
end
end

Expand Down
183 changes: 137 additions & 46 deletions lib/core/stream.nit
Original file line number Diff line number Diff line change
Expand Up @@ -662,81 +662,172 @@ abstract class Duplex
super Writer
end

# `Stream` that can be used to write to a `String`
# Write to `bytes` in memory
#
# Mainly used for compatibility with Writer type and tests.
class StringWriter
# ~~~
# var writer = new BytesWriter
#
# writer.write "Strings "
# writer.write_char '&'
# writer.write_byte 0x20u8
# writer.write_bytes "bytes".to_bytes
#
# assert writer.to_s == "\\x53\\x74\\x72\\x69\\x6E\\x67\\x73\\x20\\x26\\x20\\x62\\x79\\x74\\x65\\x73"
# assert writer.bytes.to_s == "Strings & bytes"
# ~~~
#
# As with any binary data, UTF-8 code points encoded on two bytes or more
# can be constructed byte by byte.
#
# ~~~
# writer = new BytesWriter
#
# # Write just the character first half
# writer.write_byte 0xC2u8
# assert writer.to_s == "\\xC2"
# assert writer.bytes.to_s == "�"
#
# # Complete the character
# writer.write_byte 0xA2u8
# assert writer.to_s == "\\xC2\\xA2"
# assert writer.bytes.to_s == "¢"
# ~~~
class BytesWriter
super Writer

private var content = new Buffer
redef fun to_s do return content.to_s
redef fun is_writable do return not closed
# Written memory
var bytes = new Bytes.empty

redef fun write_bytes(b) do
content.append(b.to_s)
end
redef fun to_s do return bytes.chexdigest

redef fun write(str)
do
assert not closed
content.append(str)
if closed then return
str.append_to_bytes bytes
end

redef fun write_char(c)
do
assert not closed
content.add(c)
if closed then return
bytes.add_char c
end

redef fun write_byte(value)
do
if closed then return
bytes.add value
end

redef fun write_bytes(b)
do
if closed then return
bytes.append b
end

# Is the stream closed?
protected var closed = false

redef fun close do closed = true
redef fun is_writable do return not closed
end

# `Stream` used to read from a `String`
# `Stream` writing to a `String`
#
# Mainly used for compatibility with Reader type and tests.
class StringReader
# This class has the same behavior as `BytesWriter`
# except for `to_s` which decodes `bytes` to a string.
#
# ~~~
# var writer = new StringWriter
#
# writer.write "Strings "
# writer.write_char '&'
# writer.write_byte 0x20u8
# writer.write_bytes "bytes".to_bytes
#
# assert writer.to_s == "Strings & bytes"
# ~~~
class StringWriter
super BytesWriter

redef fun to_s do return bytes.to_s
end

# Read from `bytes` in memory
#
# ~~~
# var reader = new BytesReader(b"a…b")
# assert reader.read_char == 'a'
# assert reader.read_byte == 0xE2u8 # 1st byte of '…'
# assert reader.read_byte == 0x80u8 # 2nd byte of '…'
# assert reader.read_char == '�' # Reads the last byte as an invalid char
# assert reader.read_all_bytes == b"b"
# ~~~
class BytesReader
super Reader

# The string to read from.
var source: String
# Source data to read
var bytes: Bytes

# The current position in the string (bytewise).
private var cursor: Int = 0
# The current position in `bytes`
private var cursor = 0

redef fun read_char do
if cursor < source.length then
# Fix when supporting UTF-8
var c = source[cursor]
cursor += 1
return c
else
return null
end
end
redef fun read_char
do
if cursor >= bytes.length then return null

redef fun read_byte do
if cursor < source.length then
var c = source.bytes[cursor]
cursor += 1
return c
else
return null
end
var len = bytes.items.length_of_char_at(cursor)
var char = bytes.items.char_at(cursor)
cursor += len
return char
end

redef fun close do
source = ""
redef fun read_byte
do
if cursor >= bytes.length then return null

var c = bytes[cursor]
cursor += 1
return c
end

redef fun read_all_bytes do
var nslen = source.length - cursor
var nns = new CString(nslen)
source.copy_to_native(nns, nslen, cursor, 0)
return new Bytes(nns, nslen, nslen)
redef fun close do bytes = new Bytes.empty

redef fun read_all_bytes
do
var res = bytes.slice_from(cursor)
cursor = bytes.length
return res
end

redef fun eof do return cursor >= source.byte_length
redef fun eof do return cursor >= bytes.length
end

# `Stream` reading from a `String` source
#
# This class has the same behavior as `BytesReader`
# except for its constructor accepting a `String`.
#
# ~~~
# var reader = new StringReader("a…b")
# assert reader.read_char == 'a'
# assert reader.read_byte == 0xE2u8 # 1st byte of '…'
# assert reader.read_byte == 0x80u8 # 2nd byte of '…'
# assert reader.read_char == '�' # Reads the last byte as an invalid char
# assert reader.read_all == "b"
# ~~~
class StringReader
super BytesReader

autoinit source

# Source data to read
var source: String

init do bytes = source.to_bytes

redef fun close
do
source = ""
super
end
end
2 changes: 1 addition & 1 deletion lib/core/text/flat.nit
Original file line number Diff line number Diff line change
Expand Up @@ -1205,7 +1205,7 @@ private class FlatBufferByteIterator

var curr_pos: Int

init do target_items = target._items
init do if isset target._items then target_items = target._items

redef fun index do return curr_pos

Expand Down

0 comments on commit 1f1aa04

Please sign in to comment.