Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

executable file 131 lines (104 sloc) 3.657 kB
require 'rubygems'
require 'marc' # ruby gem for working with MARC data in Ruby
require 'block_mapper' # the generic mapper class
require 'marc_record_ext.rb' # our custom methods
require 'base64' # so we can base64 encode the marc21 record
class MARCMapper
# pass in a path to a marc file
# a block can be used for logging etc..
#
# mapper.from_marc_file('/path/to/data.mrc') do |mapped_doc|
# # do something here... logging etc..
# end
#
# this returns an array of documents (hashes)
#
def self.from_marc_file(marc_file, &blk)
@mapper = BlockMapper.new
# add custom methods to each marc record
@mapper.before_each_source_item do |rec,index|
rec.extend MARCRecordExt
end
# remove ; / . , : and spaces from the end
cleanup_regexp = /( |;|\/|\.|,|:)+$/
# after_each_mapped_value gets executed for each mapped item
# ... after it has been mapped
@mapper.after_each_mapped_value do |field,v|
#puts "cleaning up #{field} value(s) before adding to solr..."
if v.is_a?(String)
v.gsub(cleanup_regexp, '') # clean this string and return
elsif v.is_a?(Array)
v.map{|vv|vv.gsub(cleanup_regexp, '')} # clean each value and return a new array
else
v # just return whatever it is
end
end
@mapper.map :id do |rec,index|
rec['001'].value.gsub(" ","").gsub("/","")
end
# titles (text)
@mapper.map :title_t do |rec,index|
rec.values_for '245', 'a'
end
@mapper.map :sub_title_t do |rec,index|
rec.values_for '245', 'b'
end
@mapper.map :alt_titles_t do |rec,index|
rec.extract '240:b 700:t 710:t 711:t 440:a 490:a 505:a 830:a'
end
@mapper.map :title_added_entry_t do |rec,index|
rec.values_for '700', 't'
end
# title sort
@mapper.map :title_sort do |rec,index|
rec.extract '245:a'
end
@mapper.map :author_t do |rec,index|
rec.extract '100:a 110:a 111:a 130:a 700:a 710:a 711:a'
end
@mapper.map :published_t do |rec,index|
rec.extract '260:a'
end
@mapper.map :isbn_t do |rec,index|
rec.isbn # in MARCRecordExt module
end
@mapper.map :material_type_t do |rec,index|
rec.values_for '300', 'a'
end
# subject (text)
@mapper.map :subject_t do |rec,index|
rec.extract '600:a 610:a 611:a 630:a 650:a 651:a 655:a 690:a'
end
# subject (facets)
@mapper.map :subject_era_facet do |rec,index|
rec.extract '650:d 650:y 651:y 655:y'
end
@mapper.map :geographic_subject_facet do |rec,index|
rec.extract '650:c 650:z 651:a 651:x 651:z 655:z'
end
@mapper.map :language_facet do |rec,index|
rec.languages # in MARCRecordExt module
end
# format fields
@mapper.map :format_facet do |rec,index|
rec.format # in MARCRecordExt module
end
# downcased, format, spaces converted to _
# This can be used for the partial view mapping
@mapper.map :format_code_t do |rec,index|
rec.format.to_s.downcase.gsub(/ _/, ' ').gsub(/ /, '_')
end
# grab some vernacular for demonstration purposes (e.g. "did you mean")
@mapper.map :vern_t do |rec,index|
rec.extract '880:a 880:b 880:c 880:e 880:f 880:p 880:t'
end
# _display is stored, but not indexed
# don't store a string, store marc21 so we can read it back out
# into a MARC::Record object
@mapper.map :marc_display do |rec,index|
rec.to_xml
end
reader = MARC::Reader.new(marc_file)
@mapper.run(reader, &blk)
end
end
Jump to Line
Something went wrong with that request. Please try again.