In [75]:
# Import needed python libraries.
from IPython.display import HTML
from jinja2 import Environment, FileSystemLoader
import pandas as pd
import os
import textwrap
import webbrowser

In [76]:
# Function to give a default values to title/author where there are none.
def default_value(value):
    if pd.isnull(value) or value == '':
        return "Unknown"
    else:
        return value

In [77]:
# Function to extract year, if possible, from mixed string and date values.
def extract_year(date_str):
    if pd.isnull(date_str) or date_str == '':
        return "(date unknown)"
    try:
        return pd.to_datetime(date_str).year
    except:
        return date_str

In [78]:
# Function to format dates with parentheses or copyright symbol when appropriate.
def format_date(row):
    if str(row['Date']).isdigit() and row['IP Status'] in ['CC0','PD']:
        return f"({row['Date']})"
    elif str(row['Date']).isdigit() and row['IP Status'] not in ['CC0','PD']:
        return f"&copy; {row['Date']}"
    else:
        return row['Date']

In [79]:
# Get IP statements.
try:
    ipstatements = pd.read_csv('ipstatements.csv')
except FileNotFoundError:
    print("Error: The file 'ipstatements.csv' was not found.")
except pd.errors.EmptyDataError:
    print("Error: The file 'ipstatements.csv' is empty.")
except Exception as e:
    # Catch any other exceptions.
    print(f"An unexpected error occurred: {e}")

In [80]:
# Get works, joining with IP statements on the "IP Status" column.
try:
    works = pd.merge(pd.read_csv('works.csv', dtype={'Date': str}), ipstatements, on='IP Status', how='inner')
except FileNotFoundError:
    print("Error: The file 'works.csv' was not found.")
except pd.errors.EmptyDataError:
    print("Error: The file 'works.csv' is empty.")
except Exception as e:
    # Catch any other exceptions.
    print(f"An unexpected error occurred: {e}")

In [81]:
# Use function above to add default values to titles/authors where there are none.
try:
    works['Title'] = works['Title'].apply(default_value)
    works['Author'] = works['Author'].apply(default_value)
except Exception as e:
    print(f"An error occurred while adding default values to titles/authors: {e}")

In [82]:
# Use function above to convert dates to just years if possible, or if not, leave as original text.
try:
    works['Date'] = works['Date'].apply(extract_year)
except Exception as e:
    print(f"An error occurred while converting the dates to years: {e}")

In [83]:
# Use function above to format dates.
try:
    works['Date'] = works.apply(format_date, axis=1)
except Exception as e:
    print(f"An error occurred while formatting dates column: {e}")

In [84]:
# Sort works by Format, Title.
try:
    works = works.sort_values(by=['Format', 'Title'])
except Exception as e:
    print(f"An error occurred while sorting works: {e}")

In [85]:
# Convert works DataFrame to a dictionary and convert any missing (NaN) values to None so they can be handled by Jinja2.
try:
    works_dict = works.to_dict(orient='records')
    works_dict = [{k: v if pd.notnull(v) else None for k, v in record.items()} for record in works_dict]
except Exception as e:
    print(f"An error occurred while converting works DataFrame to a dictionary: {e}")

In [86]:
# Report number of works and statements.
try:
    works_input = pd.read_csv('works.csv')
    works_generated = len(works_dict)
    formats_unique = len(works['Format'].unique())
    works_generated_unique = round(works_generated / formats_unique)
    output = f"The works.csv file has {works_input.shape[0]} rows and BOAT has generated license/attribution statements for {works_generated_unique} works (in various formats). If these numbers do not match, it is probably because one or more of your works does not have an IP status value that matches a value in the ipstatements.csv file."
    wrapped_output = textwrap.fill(output, width=80)
    print(wrapped_output)
except FileNotFoundError:
    print("Error calculating how many works were supplied and how many statements were generated by BOAT.")

The works.csv file has 5 rows and BOAT has generated license/attribution
statements for 5 works (in various formats). If these numbers do not match, it
is probably because one or more of your works does not have an IP status value
that matches a value in the ipstatements.csv file.


In [87]:
# Use template to format data.
try:
    template = './template.html'
    env = Environment(loader=FileSystemLoader(os.path.dirname(template)), trim_blocks=True, lstrip_blocks=True)
    template_obj = env.get_template(os.path.basename(template))
    content = template_obj.render(works=works_dict)
except Exception as e:
    print(f"An error occurred while rendering the template: {e}")

In [88]:
# Display output below.
try:
    display(HTML(content))
except Exception as e:
    print(f"An error occurred while displaying the HTML content: {e}")