# The Problem

We are a librarian tasked with identifying waste in spending on expensive databases. We want to ensure that we only remove databases that have low quantity of uses with high quantity of ILL uses.

In [1]:
import pandas as pd

In [3]:
data_main = {"journal":["Anthropology Weekly", "History Monthly", "Science Quarterly"], "quantity": [18, 20, 200]}
data_ill = {"journal":["Anthropology Weekly", "History Monthly"], "quantity": [2, 500]}

In [6]:
df_main = pd.DataFrame(data_main)
df_main

Unnamed: 0,journal,quantity
0,Anthropology Weekly,18
1,History Monthly,20
2,Science Quarterly,200


In [7]:
df_ill = pd.DataFrame(data_ill)
df_ill

Unnamed: 0,journal,quantity
0,Anthropology Weekly,2
1,History Monthly,500


# The Solution

## Identify the Areas of Overlap

In [9]:
main_journals = df_main.journal.tolist()
ill_journals = df_ill.journal.tolist()

In [12]:
overlap = set(main_journals).intersection(ill_journals)
print(overlap)

{'Anthropology Weekly', 'History Monthly'}


## Isolate the Rows that are Important for Each DataFrame

In [62]:
data = {"journal": [], "quantity": [], "ill_quantity": []}
final_main = pd.DataFrame(data)
for o in overlap:
    print(o)
    for index, row in df_main.iterrows():      
        if row["journal"] == o:
            row['ill_quantity'] = None
            final_main = final_main.append(row, ignore_index=True)
    for index, row in df_ill.iterrows():
        if row["journal"] == o:
            for index2, row2 in final_main.iterrows():
                if row['journal'] == o:
                    final_main.loc[index, 'ill_quantity'] = row['quantity']

Anthropology Weekly
History Monthly


In [63]:
final_main

Unnamed: 0,journal,quantity,ill_quantity
0,Anthropology Weekly,18.0,2
1,History Monthly,20.0,500


## Calculate the Ratio

In [70]:
final_main["result"] = final_main["quantity"]/final_main["ill_quantity"]

In [71]:
final_main

Unnamed: 0,journal,quantity,ill_quantity,result
0,Anthropology Weekly,18.0,2,9.0
1,History Monthly,20.0,500,0.04


## Organize the Data so that the Lowest Percentage Items are Highest

In [72]:
final_main.sort_values("result")

Unnamed: 0,journal,quantity,ill_quantity,result
1,History Monthly,20.0,500,0.04
0,Anthropology Weekly,18.0,2,9.0
