You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
(Using code from #57)
Calling record.content_stream().read() before writing the record causes the record to be changed in such a way that the file it writes out is incorrect and mangled.
import pytest
from io import BytesIO
from tempfile import NamedTemporaryFile
from warcio.archiveiterator import ArchiveIterator
from warcio.warcwriter import WARCWriter
from warcio.statusandheaders import StatusAndHeaders
def test_identity_correct ():
""" read(write(record)) should yield record """
with NamedTemporaryFile () as fd:
payload = b'foobar'
writer = WARCWriter (fd, gzip=False)
httpHeaders = StatusAndHeaders('GET / HTTP/1.1', {}, is_http_request=True)
warcHeaders = {'Foo': 'Bar'}
record = writer.create_warc_record ('http://example.com/', 'request',
payload=BytesIO(payload),
warc_headers_dict=warcHeaders, http_headers=httpHeaders)
writer.write_record (record)
fd.seek (0)
rut = next (ArchiveIterator (fd))
golden = record
assert rut.rec_type == golden.rec_type
assert rut.rec_headers == golden.rec_headers
assert rut.content_type == golden.content_type
assert rut.length == golden.length
assert rut.http_headers == golden.http_headers
assert rut.raw_stream.read() == payload
def test_identity_fail ():
""" read(write(record)) should yield record """
with NamedTemporaryFile () as fd:
payload = b'foobar'
writer = WARCWriter (fd, gzip=False)
httpHeaders = StatusAndHeaders('GET / HTTP/1.1', {}, is_http_request=True)
warcHeaders = {'Foo': 'Bar'}
record = writer.create_warc_record ('http://example.com/', 'request',
payload=BytesIO(payload),
warc_headers_dict=warcHeaders, http_headers=httpHeaders)
record.content_stream().read()
writer.write_record (record)
fd.seek (0)
rut = next (ArchiveIterator (fd))
golden = record
assert rut.rec_type == golden.rec_type
assert rut.rec_headers == golden.rec_headers
assert rut.content_type == golden.content_type
assert rut.length == golden.length
assert rut.http_headers == golden.http_headers
assert rut.raw_stream.read() == payload
test_identity_correct()
print("Write Worked")
test_identity_fail()
print("Write 2 Worked")
Output:
Write Worked
Traceback (most recent call last):
File "./test2.py", line 57, in <module>
test_identity_fail()
File "./test2.py", line 53, in test_identity_fail
assert rut.raw_stream.read() == payload
AssertionError
The text was updated successfully, but these errors were encountered:
(Using code from #57)
Calling record.content_stream().read() before writing the record causes the record to be changed in such a way that the file it writes out is incorrect and mangled.
Output:
The text was updated successfully, but these errors were encountered: