Skip to content

Commit

Permalink
BL-1742 Add extract_mac_subfield_limit
Browse files Browse the repository at this point in the history
  • Loading branch information
ebtoner committed Nov 30, 2023
1 parent f4f985e commit baf2e96
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 5 deletions.
11 changes: 6 additions & 5 deletions lib/cob_index/indexer_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -87,21 +87,22 @@
711fgklnpst

}.join(":")), wrap_begin_end

to_field "title_added_entry_authority_id_ms", extract_marc_subfield_limit("7000:7100:7110", "t", true)
to_field "title_added_entry_real_world_object_uri_ms", extract_marc_subfield_limit("7001:7101:7111", "t", true)

to_field "title_sort", extract_marc("245abcfgknps", alternate_script: false, first: true)

# Creator/contributor fields
to_field "creator_t", extract_marc("245c:100abcdejlmnopqrtu:110abcdelmnopt:111acdejlnopt:700abcdejqu:710abcde:711acdej", trim_punctuation: true), delete_if(CORPORATE_NAMES), wrap_begin_end
to_field "creator_authority_record_id_ms", extract_marc("1000:1100:1110")
to_field "creator_real_world_object_uri_ms", extract_marc("1001:1101:1111")



to_field "creator_facet", extract_marc("100abcdq:110abcd:111ancdj:700abcdq:710abcd:711ancdj", trim_punctuation: true), delete_if(CORPORATE_NAMES)
to_field "creator_display", extract_creator, delete_if(CORPORATE_NAMES)
to_field "contributor_display", extract_contributor, delete_if(Proc.new { |v| CORPORATE_NAMES.include?(JSON.parse(v)["name"]) })
to_field "contributor_authority_record_id_ms", extract_marc("7000:7100:7110")
to_field "contributor_real_world_object_uri_ms", extract_marc("7001:7101:7111")
to_field "title_added_entry_authority_id_ms", extract_marc("700t:710t:711t")
to_field "contributor_authority_record_id_ms", extract_marc_subfield_limit("7000:7100:7110", "t", false)
to_field "contributor_real_world_object_uri_ms", extract_marc_subfield_limit("7001:7101:7111", "t", false)

to_field "creator_vern_display", extract_creator_vern, delete_if(CORPORATE_NAMES)
to_field "contributor_vern_display", extract_contributor_vern, delete_if(CORPORATE_NAMES)
Expand Down
26 changes: 26 additions & 0 deletions lib/cob_index/macros/custom.rb
Original file line number Diff line number Diff line change
Expand Up @@ -890,4 +890,30 @@ def extract_donor
end
end
end

def extract_marc_subfield_limit(spec, subfield_limit, boolean)
# spec is the standard extract_marc string.
# subfield_limit is a string of the subfield code used to limit the extracted marc output.
# boolean is either true or false.
# If true, extract_marc_subfield_limit will only extract a field if that field also includes a subfield that matches the subfield_limit code.
# If false, extract_marc_subfield_limit will only extract a field from spec if it does not include a subfield that matches the subfield_limit code.

spec_array = spec.split(":")

lambda do |rec, acc|
spec_array.each do |spec_subset|
tag = spec_subset[0, 3]
if (boolean == true && rec.fields(tag).any? { |field| field[subfield_limit].present? }) ||
(boolean == false && rec.fields(tag).none? { |field| field[subfield_limit].present? })
values = Traject::MarcExtractor.cached(spec_subset).collect_matching_lines(rec) do |field, spec_subset, extractor|
extractor.collect_subfields(field, spec_subset)
end.compact
acc.concat(values)
else
acc
end

end
end
end
end
68 changes: 68 additions & 0 deletions spec/cob_index/macros/custom_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2517,4 +2517,72 @@
end
end
end

describe "#extract_marc_subfield_limit for title_added_entry_authority_id_ms" do

let(:path) { "extract_marc_subfield_limit.xml" }

before do
subject.instance_eval do
to_field "title_added_entry_authority_id_ms", extract_marc_subfield_limit("7000:7100:7110", "t", true)

settings do
provide "marc_source.type", "xml"
end
end
end

context "no field" do
it "does not error out" do
expect(subject.map_record(records[0])).to eq({})
end
end

context "subtitle limit matches" do
it "maps field" do
expect(subject.map_record(records[1])).to eq("title_added_entry_authority_id_ms" => ["https://id.loc.gov/authorities/names/no95021615"])
end
end

context "subtitle limit does not match" do
it "does not map field" do
expect(subject.map_record(records[2])).to eq({})
end
end

end

describe "#extract_marc_subfield_limit for contributor_authority_record_id_ms" do

let(:path) { "extract_marc_subfield_limit.xml" }

before do
subject.instance_eval do
to_field "contributor_authority_record_id_ms", extract_marc_subfield_limit("7000:7100:7110", "t", false)

settings do
provide "marc_source.type", "xml"
end
end
end

context "no field" do
it "does not error out" do
expect(subject.map_record(records[0])).to eq({})
end
end

context "subtitle limit matches" do
it "does not map field" do
expect(subject.map_record(records[1])).to eq({})
end
end

context "subtitle limit does not match" do
it "maps record" do
expect(subject.map_record(records[2])).to eq("contributor_authority_record_id_ms" => ["https://id.loc.gov/authorities/names/no2017022085"])
end
end

end
end
24 changes: 24 additions & 0 deletions spec/fixtures/marc_files/extract_marc_subfield_limit.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns='http://www.loc.gov/MARC21/slim' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd'>
<record>
</record>
<record>
<datafield ind1="1" ind2="2" tag="700">
<subfield code="a">Mozart, Wolfgang Amadeus,</subfield>
<subfield code="d">1756-1791.</subfield>
<subfield code="t">Symphonies,</subfield>
<subfield code="n">K. 73,</subfield>
<subfield code="r">C major</subfield>
<subfield code="0">https://id.loc.gov/authorities/names/no95021615</subfield>
</datafield>
</record>
<record>
<datafield ind1="1" ind2=" " tag="700">
<subfield code="a">Jenkins, Barry,</subfield>
<subfield code="d">1979-</subfield>
<subfield code="0">https://id.loc.gov/authorities/names/no2017022085</subfield>
<subfield code="e">film director,</subfield>
<subfield code="e">screenwriter.</subfield>
</datafield>
</record>
</collection>

0 comments on commit baf2e96

Please sign in to comment.