<a href="https://colab.research.google.com/github/ysugiyama3/google_colab/blob/master/yale_dup_search.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Search Voyager records by ISBN**
This program automatically searches Voyager records by ISBN found in a base spreadsheet.

(Contact yukari.sugiyama@yale.edu if there is any issue with the program)

---

### **What you need**
An Excel spreadsheet in which the first column must be assigned for ISBN. The spreadsheet can have as many columns as necesary and must have column headers.

---

In [1]:
#@title <--- Click the play button

from pandas.core.common import index_labels_to_array
from google.colab import files
import pandas as pd
import requests
import json
from IPython.display import HTML, display
import time
import re

!pip install --upgrade xlrd

def progress(value, max=50000):
    return HTML("""
        <progress
            value='{value}'
            max='{max}',
            style='width: 40%'
        >
            {value}
        </progress>
        <br>{value}/{max}</br>
    """.format(value=value, max=max))

def clean_isbn(isbn):
    if isbn is None or pd.isnull(isbn):
        isbn = None
    elif len(str(isbn)) > 0:
        isbn = str(isbn)
        isbn = re.sub(r'[\(|\:|\.].*', '', isbn)
        isbn = re.sub(r'[^0-9Xx]', '', isbn) 
    else:
        isbn = None
    return isbn

def get_item_info(item):
    note = mfhdid = callno = itemid = enum = locname = status = ''
    mfhdid = item['mfhdid']
    callno = item['callno']
    itemid = item['itemid']
    if itemid == 'NA':
        note = 'Held (but no item records)'
    else:
        enum = item['itemenum']
        locname = item['locname']
        status = item['itemstatus']
        note = 'Held'
    return note, mfhdid, callno, itemid, enum, locname, status

def check_records(records):
    global output_df
    for record in records:
        bibid = record['bibid']
        try:
            title = record['title']   
            items = record['items']
            for item in items:
                note, mfhdid, callno, itemid, enum, locname, status = get_item_info(item)
                output_df = output_df.append([input_df.iloc[index]],ignore_index=True)
                current_index_loc = output_df.index.size-1
                output_df.loc[current_index_loc, ['[NOTE]', '[BIB_ID]', '[TITLE]', '[MFHD_ID]', '[CALL_NO]', '[ITEM_ID]', '[ENUM]', '[LOCATION]', '[STATUS]']] = [note, bibid, title, mfhdid, callno, itemid, enum, locname, status]
        except:
            note = 'Check manually'
            output_df = output_df.append([input_df.iloc[index]],ignore_index=True)
            current_index_loc = output_df.index.size-1
            output_df.loc[current_index_loc, '[NOTE]'] = note

def search_opac(isbn):
    global output_df
    number = index + 0.01
    if isbn is None or pd.isnull(isbn):
        note = 'No ISBN'
        output_df = output_df.append([input_df.iloc[index]],ignore_index=True)
        current_index_loc = output_df.index.size-1
        output_df.loc[current_index_loc, '[NOTE]'] = note
    else:
        url = 'http://libapp-test.library.yale.edu/VoySearch/GetBibItem?isxn=' + str(isbn)
        r = requests.get(url)
        result = json.loads(r.text)
        records= result['record']
        check_records(records)

print('\n')
# Upload an input Excel file
uploaded = files.upload()
input_name = str(list(uploaded.keys())[0])

# Read an input Excel file into a pandas DataFrame
input_df = pd.read_excel(input_name)

# Create an output Excel file based on input excel file
output_name = input_name.rsplit( ".", 1 )[0] + "_output.xlsx"

# Create an output DataFrame
output_df = pd.DataFrame(columns=input_df.columns)
output_df = pd.concat([output_df, pd.DataFrame(columns = ['[NOTE]', '[BIB_ID]', '[TITLE]', '[MFHD_ID]', '[CALL_NO]', '[ITEM_ID]', '[ENUM]', '[LOCATION]', '[STATUS]'])], sort=False)

# count
total = len(input_df.index)
count = 0

out = display(progress(0, total), display_id=True)

for index, row in input_df.iterrows():
    count += 1
    time.sleep(0.02)
    out.update(progress(count, total))
    isbn = row[0]
    isbn = clean_isbn(isbn)
    try:
        search_opac(isbn)
    except Exception as e:
        print(e)

try:
    output_df.to_excel(output_name, index=False)
except:
    csv_output_name = output_name.rsplit( ".", 1 )[0] + '.csv'
    output_df.to_csv(csv_output_name, index=False, encoding='utf-8')

print('\nDone!')

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting xlrd
  Downloading xlrd-2.0.1-py2.py3-none-any.whl (96 kB)
[K     |████████████████████████████████| 96 kB 2.2 MB/s 
[?25hInstalling collected packages: xlrd
  Attempting uninstall: xlrd
    Found existing installation: xlrd 1.1.0
    Uninstalling xlrd-1.1.0:
      Successfully uninstalled xlrd-1.1.0
Successfully installed xlrd-2.0.1




Saving Sample_Backlog_List.xlsx to Sample_Backlog_List.xlsx



Done!
