Permalink
Browse files

add specs to ensure all data is returned as UTF-8

  • Loading branch information...
1 parent 31e904f commit 1a6ce4561f85ee4a889b8460bc06edef1f3428aa @yob committed Jun 4, 2009
Showing with 117 additions and 1 deletion.
  1. +58 −0 data/iso_8859_1.xml
  2. BIN data/utf_16.xml
  3. +12 −1 lib/onix/reader.rb
  4. +47 −0 spec/reader_spec.rb
View
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE ONIXMessage SYSTEM "http://www.editeur.org/onix/2.1/02/reference/onix-international.dtd">
+<ONIXMessage>
+ <Header>
+ <FromCompany>John Wiley &amp; Sons Australia</FromCompany>
+ <ToCompany>Professional/Trade</ToCompany>
+ <SentDate>20090519</SentDate>
+ <MessageNote>Complete Reload</MessageNote>
+ <DefaultLanguageOfText>eng</DefaultLanguageOfText>
+ <DefaultPriceTypeCode>02</DefaultPriceTypeCode>
+ <DefaultCurrencyCode>AUD</DefaultCurrencyCode>
+ </Header>
+ <Product>
+ <RecordReference>1742169341</RecordReference>
+ <NotificationType>03</NotificationType>
+ <ProductIdentifier>
+ <ProductIDType>02</ProductIDType>
+ <IDValue>1742169341</IDValue>
+ </ProductIdentifier>
+ <ProductIdentifier>
+ <ProductIDType>15</ProductIDType>
+ <IDValue>9781742169347</IDValue>
+ </ProductIdentifier>
+ <ProductForm>BC</ProductForm>
+ <Title>
+ <TitleType>01</TitleType>
+ <TitleText>Property Is a Girl's Best Friend</TitleText>
+ </Title>
+ <Contributor>
+ <ContributorRole>A01</ContributorRole>
+ <PersonNameInverted>Küng, Hans</PersonNameInverted>
+ </Contributor>
+ <NumberOfPages>304</NumberOfPages>
+ <BICMainSubject>KF</BICMainSubject>
+ <Subject>
+ <SubjectSchemeIdentifier>12</SubjectSchemeIdentifier>
+ <SubjectCode>F</SubjectCode>
+ </Subject>
+ <AudienceCode>01</AudienceCode>
+ <Imprint>
+ <ImprintName>Wrightbooks</ImprintName>
+ </Imprint>
+ <Publisher>
+ <PublishingRole>01</PublishingRole>
+ <PublisherName>John Wiley &amp; Sons Australia</PublisherName>
+ </Publisher>
+ <PublishingStatus>02</PublishingStatus>
+ <PublicationDate>20091001</PublicationDate>
+ <SupplyDetail>
+ <SupplierName>John Wiley &amp; Sons Australia</SupplierName>
+ <SupplierRole>01</SupplierRole>
+ <ProductAvailability>10</ProductAvailability>
+ <Price>
+ <PriceAmount>32.95</PriceAmount>
+ </Price>
+ </SupplyDetail>
+ </Product>
+</ONIXMessage>
View
Binary file not shown.
View
@@ -105,14 +105,25 @@ def read_next
if @reader.name == "Header"
return ONIX::Header.from_xml(@reader.read_outer_xml.dup)
elsif @reader.name == "Product"
- return @product_klass.from_xml(@reader.read_outer_xml.dup)
+ str = normalise_string_encoding(@reader.read_outer_xml.dup)
+ return @product_klass.from_xml(str)
end
end
end
return nil
end
+ # if necesary, convert the provided string to utf-8
+ #
+ def normalise_string_encoding(str)
+ if RUBY_VERSION >= "1.9"
+ return str.dup.force_encoding("utf-8")
+ else
+ str
+ end
+ end
+
# simple mapping of encoding constants to a string
#
def encoding_const_to_name(const)
View
@@ -11,6 +11,8 @@
@file1 = File.join(data_path, "9780194351898.xml")
@file2 = File.join(data_path, "two_products.xml")
@entity_file = File.join(data_path, "entities.xml")
+ @utf_16_file = File.join(data_path, "utf_16.xml")
+ @iso_8859_1_file = File.join(data_path, "iso_8859_1.xml")
end
specify "should initialize with a filename" do
@@ -76,4 +78,49 @@
products.first.record_reference.should eql("9780732287573")
products.first.titles.first.title_text.should eql("High Noon\342\200\223in Nimbin")
end
+
+ # for some reason I'm getting segfaults when I read a file with more than 7 records
+ specify "should correctly parse a file with more than 7 records in in" do
+ reader = ONIX::Reader.new(@long_file)
+ counter = 0
+ reader.each do |product|
+ counter += 1
+ end
+
+ counter.should eql(346)
+ end
+
+ specify "should transparently convert a iso-8859-1 file to utf-8" do
+ reader = ONIX::Reader.new(@iso_8859_1_file)
+ product = nil
+ reader.each do |p|
+ product = p
+ end
+
+ # ROXML appears to munge the string encodings
+ #if RUBY_VERSION >= "1.9"
+ # utf8 = Encoding.find("utf-8")
+ # product.contributors[0].person_name_inverted.encoding.should eql(utf8)
+ #end
+
+ product.contributors[0].person_name_inverted.should eql("Küng, Hans")
+
+ end
+
+ specify "should transparently convert a utf-16 file to utf-8" do
+ reader = ONIX::Reader.new(@utf_16_file)
+ product = nil
+ reader.each do |p|
+ product = p
+ end
+
+ # ROXML appears to munge the string encodings
+ #if RUBY_VERSION >= "1.9"
+ # utf8 = Encoding.find("utf-8")
+ # product.contributors[0].person_name_inverted.encoding.should eql(utf8)
+ #end
+
+ product.contributors[0].person_name_inverted.should eql("Küng, Hans")
+
+ end
end

0 comments on commit 1a6ce45

Please sign in to comment.