<a href="https://colab.research.google.com/github/ysugiyama3/google_colab/blob/master/film_reels_marc_records.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **MARC Record Geneator (Yale Film Studies Center)**
by Yukari Sugiyama (View source code on <a href='https://github.com/ysugiyama3/google_colab'>Github</a>)

The program generates brief MARC records for film reels based on a CSV file provided monthly by Yale Film Studies Center.

---


In [0]:
#@title 1. Install pymarc
!pip install pymarc

In [0]:
#@title 2. Upload an input CSV file
from google.colab import files
import pandas as pd
import pymarc
from IPython.display import HTML, display
import time
import re
import datetime

def progress(value, max=50000):
    return HTML("""
        <progress
            value='{value}'
            max='{max}',
            style='width: 40%'
        >
            {value}
        </progress>
        <br>{value}/{max}</br>
    """.format(value=value, max=max))

def check_period(value):
    value = str(value)
    if not value.endswith('.'):
        value = value + '.'
    return value

def add_field_008(date):
    now = datetime.datetime.now()
    code = ''
    if date is None or pd.isnull(date):
        code = now.strftime('%y%m%d') + 'n' + 'uuuu' + '    xx            --- m|und d'
    else:
        code = now.strftime('%y%m%d') + 's' + str(date) + '    xx            --- m|und d'
    field_008 = pymarc.Field(tag='008', data = code)
    item_load.add_field(field_008)


def add_field_099(callno):
    field_099 = pymarc.Field(
      tag='099', 
      indicators = [' ',' '],
      subfields = ['a', callno],
      )
    item_load.add_field(field_099)

def add_field_245(title, medium):
    medium = check_period(medium)
    ind_2 = ''
    if title.startswith('The '):
        ind_2 = '4'
    elif title.startswith('A '):
        ind_2 = '2'
    elif title.startswith('An '):
        ind_2 = '3'
    else:
        ind_2 = '0'
    field_245 = pymarc.Field(
      tag='245', 
      indicators = ['0',ind_2],
      subfields = ['a', title, 'h', medium],
      )
    item_load.add_field(field_245)

def add_field_260(date):
    date = check_period(date)
    field_260 = pymarc.Field(
      tag='260', 
      indicators = [' ',' '],
      subfields = ['c', date],
      )
    item_load.add_field(field_260)

def add_field_300(reel_count, gauge):
    reel_count = reel_count.replace(';','')
    if reel_count.startswith('1'):
        reel_count = reel_count.replace('reels', 'reel')
    gauge = check_period(gauge)
    field_300 = pymarc.Field(
      tag='300', 
      indicators = [' ',' '],
      subfields = ['a', reel_count+' ;', 'c', gauge],
      )
    item_load.add_field(field_300)

def add_field_500(note):
    note = check_period(note)
    field_500 = pymarc.Field(
      tag='500', 
      indicators = [' ',' '],
      subfields = ['a', note],
      )
    item_load.add_field(field_500)

def add_field_506(usage):
    usage = check_period(usage)
    field_506 = pymarc.Field(
      tag='506', 
      indicators = [' ',' '],
      subfields = ['a', usage],
      )
    item_load.add_field(field_506)

def add_field_590(condition):
    condition = check_period(condition)
    field_590 = pymarc.Field(
      tag='590', 
      indicators = [' ',' '],
      subfields = ['a', condition],
      )
    item_load.add_field(field_590)

def add_field_700(director):
    director = check_period(director)
    field_700 = pymarc.Field(
      tag='700', 
      indicators = ['1',' '],
      subfields = ['a', director],
      )
    item_load.add_field(field_700)

def add_field_852(callno):
    for reel_no, barcode in item_dict[callno].items():
        field_852 = pymarc.Field(
          tag='852', 
          indicators = ['8','1'],
          subfields = ['b', 'fsfilm', 'h', callno, 't', 'reel '+str(reel_no), 'x', str(barcode)],
          )
        item_load.add_field(field_852)

def edit_leader():
    item_load.leader = item_load.leader[0:5] + 'ngm' + item_load.leader[8:17] + '5a' + item_load.leader[19:]

# Upload an input CSV file
uploaded = files.upload()

In [0]:
#@title 3. Select a file

# Select an input CSV file
input_name = input('Enter a file name: ')

# Read an input Excel file into a pandas DataFrame
input_df = pd.read_csv(input_name)

# Delete blank rows if all values are NA
input_df = input_df.dropna(axis=0, how='all', thresh=None, subset=None, inplace=False)

# Delete the first row
input_df = input_df.drop([0])

# Convert Barcode and Reel Number to int64
input_df['Barcode'] = input_df['Barcode'].astype('int64')
input_df['Reel Number'] = input_df['Reel Number'].astype('int64')

# Create dictionary for callno, reel_number, and barcode
item_dict = dict()
df = input_df.ffill(axis = 0) # Fill down the missing value
for index, row in df.iterrows(): # Update dictionary
    callno = row[0]
    item = row[10]
    barcode = row[11]
    if callno not in item_dict:
        item_dict.update({callno : {item : barcode}})
    else:
       item_dict[callno][item] = barcode

# Delete blank rows if 'Call #' value is NA
input_df = input_df.dropna(subset=['Call #'])

# Number of rows
total = len(input_df.index)

# Create an output mrc file
output_name = input_name[:-4] + '_output.mrc'
outputfile = open(output_name,'wb')

In [0]:
#@title 4. Convert CSV file to MARC records

count = 0

out = display(progress(0, total), display_id=True)

for index, row in input_df.iterrows():      

    count += 1
    time.sleep(0.02)
    out.update(progress(count, total))

    item_load = pymarc.Record(to_unicode=True, force_utf8=True)

    callno = row[0]
    title = row[1]
    medium = row[2]
    date = row[3]
    reel_count = row[4]
    gauge = row[5]
    note = row[6]
    usage = row[7]
    condition = row[8]
    director = row[9]

    add_field_008(date)
    add_field_099(callno)
    add_field_245(title, medium)
    add_field_260(date)
    add_field_300(reel_count, gauge)
    add_field_500(note)
    add_field_506(usage)
    add_field_590(condition)
    add_field_700(director)
    add_field_852(callno)
    edit_leader()

    outputfile.write(item_load.as_marc())
outputfile.close()
print('Done!\U0001f44D')